xref: /dflybsd-src/sys/dev/netif/ix/if_ix.c (revision 820c5b08a73294c6424e31653739d096e5720e9f)
1 /*
2  * Copyright (c) 2001-2013, Intel Corporation
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *
8  *  1. Redistributions of source code must retain the above copyright notice,
9  *     this list of conditions and the following disclaimer.
10  *
11  *  2. Redistributions in binary form must reproduce the above copyright
12  *     notice, this list of conditions and the following disclaimer in the
13  *     documentation and/or other materials provided with the distribution.
14  *
15  *  3. Neither the name of the Intel Corporation nor the names of its
16  *     contributors may be used to endorse or promote products derived from
17  *     this software without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include "opt_ifpoll.h"
33 #include "opt_ix.h"
34 
35 #include <sys/param.h>
36 #include <sys/bus.h>
37 #include <sys/endian.h>
38 #include <sys/interrupt.h>
39 #include <sys/kernel.h>
40 #include <sys/malloc.h>
41 #include <sys/mbuf.h>
42 #include <sys/proc.h>
43 #include <sys/rman.h>
44 #include <sys/serialize.h>
45 #include <sys/serialize2.h>
46 #include <sys/socket.h>
47 #include <sys/sockio.h>
48 #include <sys/sysctl.h>
49 #include <sys/systm.h>
50 
51 #include <net/bpf.h>
52 #include <net/ethernet.h>
53 #include <net/if.h>
54 #include <net/if_arp.h>
55 #include <net/if_dl.h>
56 #include <net/if_media.h>
57 #include <net/ifq_var.h>
58 #include <net/toeplitz.h>
59 #include <net/toeplitz2.h>
60 #include <net/vlan/if_vlan_var.h>
61 #include <net/vlan/if_vlan_ether.h>
62 #include <net/if_poll.h>
63 
64 #include <netinet/in_systm.h>
65 #include <netinet/in.h>
66 #include <netinet/ip.h>
67 
68 #include <bus/pci/pcivar.h>
69 #include <bus/pci/pcireg.h>
70 
71 #include <dev/netif/ix/ixgbe_api.h>
72 #include <dev/netif/ix/if_ix.h>
73 
74 #ifdef IX_RSS_DEBUG
75 #define IX_RSS_DPRINTF(sc, lvl, fmt, ...) \
76 do { \
77 	if (sc->rss_debug >= lvl) \
78 		if_printf(&sc->arpcom.ac_if, fmt, __VA_ARGS__); \
79 } while (0)
80 #else	/* !IX_RSS_DEBUG */
81 #define IX_RSS_DPRINTF(sc, lvl, fmt, ...)	((void)0)
82 #endif	/* IX_RSS_DEBUG */
83 
84 #define IX_NAME			"Intel(R) PRO/10GbE "
85 #define IX_DEVICE(id) \
86 	{ IXGBE_VENDOR_ID, IXGBE_DEV_ID_##id, IX_NAME #id }
87 #define IX_DEVICE_NULL		{ 0, 0, NULL }
88 
89 static struct ix_device {
90 	uint16_t	vid;
91 	uint16_t	did;
92 	const char	*desc;
93 } ix_devices[] = {
94 	IX_DEVICE(82598AF_DUAL_PORT),
95 	IX_DEVICE(82598AF_SINGLE_PORT),
96 	IX_DEVICE(82598EB_CX4),
97 	IX_DEVICE(82598AT),
98 	IX_DEVICE(82598AT2),
99 	IX_DEVICE(82598),
100 	IX_DEVICE(82598_DA_DUAL_PORT),
101 	IX_DEVICE(82598_CX4_DUAL_PORT),
102 	IX_DEVICE(82598EB_XF_LR),
103 	IX_DEVICE(82598_SR_DUAL_PORT_EM),
104 	IX_DEVICE(82598EB_SFP_LOM),
105 	IX_DEVICE(82599_KX4),
106 	IX_DEVICE(82599_KX4_MEZZ),
107 	IX_DEVICE(82599_SFP),
108 	IX_DEVICE(82599_XAUI_LOM),
109 	IX_DEVICE(82599_CX4),
110 	IX_DEVICE(82599_T3_LOM),
111 	IX_DEVICE(82599_COMBO_BACKPLANE),
112 	IX_DEVICE(82599_BACKPLANE_FCOE),
113 	IX_DEVICE(82599_SFP_SF2),
114 	IX_DEVICE(82599_SFP_FCOE),
115 	IX_DEVICE(82599EN_SFP),
116 	IX_DEVICE(82599_SFP_SF_QP),
117 	IX_DEVICE(X540T),
118 
119 	/* required last entry */
120 	IX_DEVICE_NULL
121 };
122 
123 static int	ix_probe(device_t);
124 static int	ix_attach(device_t);
125 static int	ix_detach(device_t);
126 static int	ix_shutdown(device_t);
127 
128 static void	ix_serialize(struct ifnet *, enum ifnet_serialize);
129 static void	ix_deserialize(struct ifnet *, enum ifnet_serialize);
130 static int	ix_tryserialize(struct ifnet *, enum ifnet_serialize);
131 #ifdef INVARIANTS
132 static void	ix_serialize_assert(struct ifnet *, enum ifnet_serialize,
133 		    boolean_t);
134 #endif
135 static void	ix_start(struct ifnet *, struct ifaltq_subque *);
136 static void	ix_watchdog(struct ifaltq_subque *);
137 static int	ix_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
138 static void	ix_init(void *);
139 static void	ix_stop(struct ix_softc *);
140 static void	ix_media_status(struct ifnet *, struct ifmediareq *);
141 static int	ix_media_change(struct ifnet *);
142 static void	ix_timer(void *);
143 #ifdef IFPOLL_ENABLE
144 static void	ix_npoll(struct ifnet *, struct ifpoll_info *);
145 static void	ix_npoll_rx(struct ifnet *, void *, int);
146 static void	ix_npoll_tx(struct ifnet *, void *, int);
147 static void	ix_npoll_status(struct ifnet *);
148 #endif
149 
150 static void	ix_add_sysctl(struct ix_softc *);
151 static void	ix_add_intr_rate_sysctl(struct ix_softc *, int,
152 		    const char *, int (*)(SYSCTL_HANDLER_ARGS), const char *);
153 static int	ix_sysctl_tx_wreg_nsegs(SYSCTL_HANDLER_ARGS);
154 static int	ix_sysctl_rx_wreg_nsegs(SYSCTL_HANDLER_ARGS);
155 static int	ix_sysctl_txd(SYSCTL_HANDLER_ARGS);
156 static int	ix_sysctl_rxd(SYSCTL_HANDLER_ARGS);
157 static int	ix_sysctl_tx_intr_nsegs(SYSCTL_HANDLER_ARGS);
158 static int	ix_sysctl_intr_rate(SYSCTL_HANDLER_ARGS, int);
159 static int	ix_sysctl_rxtx_intr_rate(SYSCTL_HANDLER_ARGS);
160 static int	ix_sysctl_rx_intr_rate(SYSCTL_HANDLER_ARGS);
161 static int	ix_sysctl_tx_intr_rate(SYSCTL_HANDLER_ARGS);
162 static int	ix_sysctl_sts_intr_rate(SYSCTL_HANDLER_ARGS);
163 static int	ix_sysctl_flowctrl(SYSCTL_HANDLER_ARGS);
164 #ifdef foo
165 static int	ix_sysctl_advspeed(SYSCTL_HANDLER_ARGS);
166 #endif
167 #if 0
168 static void     ix_add_hw_stats(struct ix_softc *);
169 #endif
170 #ifdef IFPOLL_ENABLE
171 static int	ix_sysctl_npoll_rxoff(SYSCTL_HANDLER_ARGS);
172 static int	ix_sysctl_npoll_txoff(SYSCTL_HANDLER_ARGS);
173 #endif
174 
175 static void	ix_slot_info(struct ix_softc *);
176 static int	ix_alloc_rings(struct ix_softc *);
177 static void	ix_free_rings(struct ix_softc *);
178 static void	ix_setup_ifp(struct ix_softc *);
179 static void	ix_setup_serialize(struct ix_softc *);
180 static void	ix_set_ring_inuse(struct ix_softc *, boolean_t);
181 static void	ix_set_timer_cpuid(struct ix_softc *, boolean_t);
182 static void	ix_update_stats(struct ix_softc *);
183 
184 static void	ix_set_promisc(struct ix_softc *);
185 static void	ix_set_multi(struct ix_softc *);
186 static void	ix_set_vlan(struct ix_softc *);
187 static uint8_t	*ix_mc_array_itr(struct ixgbe_hw *, uint8_t **, uint32_t *);
188 
189 static int	ix_get_txring_inuse(const struct ix_softc *, boolean_t);
190 static void	ix_init_tx_ring(struct ix_tx_ring *);
191 static void	ix_free_tx_ring(struct ix_tx_ring *);
192 static int	ix_create_tx_ring(struct ix_tx_ring *);
193 static void	ix_destroy_tx_ring(struct ix_tx_ring *, int);
194 static void	ix_init_tx_unit(struct ix_softc *);
195 static int	ix_encap(struct ix_tx_ring *, struct mbuf **,
196 		    uint16_t *, int *);
197 static int	ix_tx_ctx_setup(struct ix_tx_ring *,
198 		    const struct mbuf *, uint32_t *, uint32_t *);
199 static int	ix_tso_ctx_setup(struct ix_tx_ring *,
200 		    const struct mbuf *, uint32_t *, uint32_t *);
201 static void	ix_txeof(struct ix_tx_ring *, int);
202 
203 static int	ix_get_rxring_inuse(const struct ix_softc *, boolean_t);
204 static int	ix_init_rx_ring(struct ix_rx_ring *);
205 static void	ix_free_rx_ring(struct ix_rx_ring *);
206 static int	ix_create_rx_ring(struct ix_rx_ring *);
207 static void	ix_destroy_rx_ring(struct ix_rx_ring *, int);
208 static void	ix_init_rx_unit(struct ix_softc *);
209 #if 0
210 static void	ix_setup_hw_rsc(struct ix_rx_ring *);
211 #endif
212 static int	ix_newbuf(struct ix_rx_ring *, int, boolean_t);
213 static void	ix_rxeof(struct ix_rx_ring *, int);
214 static void	ix_rx_discard(struct ix_rx_ring *, int, boolean_t);
215 static void	ix_enable_rx_drop(struct ix_softc *);
216 static void	ix_disable_rx_drop(struct ix_softc *);
217 
218 static void	ix_alloc_msix(struct ix_softc *);
219 static void	ix_free_msix(struct ix_softc *, boolean_t);
220 static void	ix_conf_rx_msix(struct ix_softc *, int, int *, int);
221 static void	ix_conf_tx_msix(struct ix_softc *, int, int *, int);
222 static void	ix_setup_msix_eims(const struct ix_softc *, int,
223 		    uint32_t *, uint32_t *);
224 static int	ix_alloc_intr(struct ix_softc *);
225 static void	ix_free_intr(struct ix_softc *);
226 static int	ix_setup_intr(struct ix_softc *);
227 static void	ix_teardown_intr(struct ix_softc *, int);
228 static void	ix_enable_intr(struct ix_softc *);
229 static void	ix_disable_intr(struct ix_softc *);
230 static void	ix_set_ivar(struct ix_softc *, uint8_t, uint8_t, int8_t);
231 static void	ix_set_eitr(struct ix_softc *, int, int);
232 static void	ix_intr_status(struct ix_softc *, uint32_t);
233 static void	ix_intr(void *);
234 static void	ix_msix_rxtx(void *);
235 static void	ix_msix_rx(void *);
236 static void	ix_msix_tx(void *);
237 static void	ix_msix_status(void *);
238 
239 static void	ix_config_link(struct ix_softc *);
240 static boolean_t ix_sfp_probe(struct ix_softc *);
241 static boolean_t ix_is_sfp(const struct ixgbe_hw *);
242 static void	ix_setup_optics(struct ix_softc *);
243 static void	ix_update_link_status(struct ix_softc *);
244 static void	ix_handle_link(struct ix_softc *);
245 static void	ix_handle_mod(struct ix_softc *);
246 static void	ix_handle_msf(struct ix_softc *);
247 
248 /* XXX Shared code structure requires this for the moment */
249 extern void ixgbe_stop_mac_link_on_d3_82599(struct ixgbe_hw *);
250 
251 static device_method_t ix_methods[] = {
252 	/* Device interface */
253 	DEVMETHOD(device_probe,		ix_probe),
254 	DEVMETHOD(device_attach,	ix_attach),
255 	DEVMETHOD(device_detach,	ix_detach),
256 	DEVMETHOD(device_shutdown,	ix_shutdown),
257 	DEVMETHOD_END
258 };
259 
260 static driver_t ix_driver = {
261 	"ix",
262 	ix_methods,
263 	sizeof(struct ix_softc)
264 };
265 
266 static devclass_t ix_devclass;
267 
268 DECLARE_DUMMY_MODULE(if_ix);
269 DRIVER_MODULE(if_ix, pci, ix_driver, ix_devclass, NULL, NULL);
270 
271 static int	ix_msi_enable = 1;
272 static int	ix_msix_enable = 1;
273 static int	ix_msix_agg_rxtx = 1;
274 static int	ix_rxr = 0;
275 static int	ix_txr = 0;
276 static int	ix_txd = IX_PERF_TXD;
277 static int	ix_rxd = IX_PERF_RXD;
278 static int	ix_unsupported_sfp = 0;
279 
280 TUNABLE_INT("hw.ix.msi.enable", &ix_msi_enable);
281 TUNABLE_INT("hw.ix.msix.enable", &ix_msix_enable);
282 TUNABLE_INT("hw.ix.msix.agg_rxtx", &ix_msix_agg_rxtx);
283 TUNABLE_INT("hw.ix.rxr", &ix_rxr);
284 TUNABLE_INT("hw.ix.txr", &ix_txr);
285 TUNABLE_INT("hw.ix.txd", &ix_txd);
286 TUNABLE_INT("hw.ix.rxd", &ix_rxd);
287 TUNABLE_INT("hw.ix.unsupported_sfp", &ix_unsupported_sfp);
288 
289 /*
290  * Smart speed setting, default to on.  This only works
291  * as a compile option right now as its during attach,
292  * set this to 'ixgbe_smart_speed_off' to disable.
293  */
294 static const enum ixgbe_smart_speed ix_smart_speed =
295     ixgbe_smart_speed_on;
296 
297 static int
298 ix_probe(device_t dev)
299 {
300 	const struct ix_device *d;
301 	uint16_t vid, did;
302 
303 	vid = pci_get_vendor(dev);
304 	did = pci_get_device(dev);
305 
306 	for (d = ix_devices; d->desc != NULL; ++d) {
307 		if (vid == d->vid && did == d->did) {
308 			device_set_desc(dev, d->desc);
309 			return 0;
310 		}
311 	}
312 	return ENXIO;
313 }
314 
315 static int
316 ix_attach(device_t dev)
317 {
318 	struct ix_softc *sc = device_get_softc(dev);
319 	struct ixgbe_hw *hw;
320 	int error, ring_cnt_max;
321 	uint16_t csum;
322 	uint32_t ctrl_ext;
323 #ifdef IFPOLL_ENABLE
324 	int offset, offset_def;
325 #endif
326 
327 	sc->dev = sc->osdep.dev = dev;
328 	hw = &sc->hw;
329 
330 	if_initname(&sc->arpcom.ac_if, device_get_name(dev),
331 	    device_get_unit(dev));
332 	ifmedia_init(&sc->media, IFM_IMASK,
333 	    ix_media_change, ix_media_status);
334 
335 	/* Save frame size */
336 	sc->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHER_CRC_LEN;
337 
338 	callout_init_mp(&sc->timer);
339 	lwkt_serialize_init(&sc->main_serialize);
340 
341 	/*
342 	 * Save off the information about this board
343 	 */
344 	hw->vendor_id = pci_get_vendor(dev);
345 	hw->device_id = pci_get_device(dev);
346 	hw->revision_id = pci_read_config(dev, PCIR_REVID, 1);
347 	hw->subsystem_vendor_id = pci_read_config(dev, PCIR_SUBVEND_0, 2);
348 	hw->subsystem_device_id = pci_read_config(dev, PCIR_SUBDEV_0, 2);
349 
350 	ixgbe_set_mac_type(hw);
351 
352 	/* Pick up the 82599 and VF settings */
353 	if (hw->mac.type != ixgbe_mac_82598EB)
354 		hw->phy.smart_speed = ix_smart_speed;
355 
356 	/* Enable bus mastering */
357 	pci_enable_busmaster(dev);
358 
359 	/*
360 	 * Allocate IO memory
361 	 */
362 	sc->mem_rid = PCIR_BAR(0);
363 	sc->mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
364 	    &sc->mem_rid, RF_ACTIVE);
365 	if (sc->mem_res == NULL) {
366 		device_printf(dev, "Unable to allocate bus resource: memory\n");
367 		error = ENXIO;
368 		goto failed;
369 	}
370 
371 	sc->osdep.mem_bus_space_tag = rman_get_bustag(sc->mem_res);
372 	sc->osdep.mem_bus_space_handle = rman_get_bushandle(sc->mem_res);
373 
374 	sc->hw.hw_addr = (uint8_t *)&sc->osdep.mem_bus_space_handle;
375 	sc->hw.back = &sc->osdep;
376 
377 	/*
378 	 * Configure total supported RX/TX ring count
379 	 */
380 	sc->rx_ring_cnt = device_getenv_int(dev, "rxr", ix_rxr);
381 	sc->rx_ring_cnt = if_ring_count2(sc->rx_ring_cnt, IX_MAX_RXRING);
382 	sc->rx_ring_inuse = sc->rx_ring_cnt;
383 
384 	switch (hw->mac.type) {
385 	case ixgbe_mac_82598EB:
386 		ring_cnt_max = IX_MAX_TXRING_82598;
387 		break;
388 
389 	case ixgbe_mac_82599EB:
390 		ring_cnt_max = IX_MAX_TXRING_82599;
391 		break;
392 
393 	case ixgbe_mac_X540:
394 		ring_cnt_max = IX_MAX_TXRING_X540;
395 		break;
396 
397 	default:
398 		ring_cnt_max = 1;
399 		break;
400 	}
401 	sc->tx_ring_cnt = device_getenv_int(dev, "txr", ix_txr);
402 	sc->tx_ring_cnt = if_ring_count2(sc->tx_ring_cnt, ring_cnt_max);
403 	sc->tx_ring_inuse = sc->tx_ring_cnt;
404 
405 	/* Allocate TX/RX rings */
406 	error = ix_alloc_rings(sc);
407 	if (error)
408 		goto failed;
409 
410 #ifdef IFPOLL_ENABLE
411 	/*
412 	 * NPOLLING RX CPU offset
413 	 */
414 	if (sc->rx_ring_cnt == ncpus2) {
415 		offset = 0;
416 	} else {
417 		offset_def = (sc->rx_ring_cnt * device_get_unit(dev)) % ncpus2;
418 		offset = device_getenv_int(dev, "npoll.rxoff", offset_def);
419 		if (offset >= ncpus2 ||
420 		    offset % sc->rx_ring_cnt != 0) {
421 			device_printf(dev, "invalid npoll.rxoff %d, use %d\n",
422 			    offset, offset_def);
423 			offset = offset_def;
424 		}
425 	}
426 	sc->rx_npoll_off = offset;
427 
428 	/*
429 	 * NPOLLING TX CPU offset
430 	 */
431 	if (sc->tx_ring_cnt == ncpus2) {
432 		offset = 0;
433 	} else {
434 		offset_def = (sc->tx_ring_cnt * device_get_unit(dev)) % ncpus2;
435 		offset = device_getenv_int(dev, "npoll.txoff", offset_def);
436 		if (offset >= ncpus2 ||
437 		    offset % sc->tx_ring_cnt != 0) {
438 			device_printf(dev, "invalid npoll.txoff %d, use %d\n",
439 			    offset, offset_def);
440 			offset = offset_def;
441 		}
442 	}
443 	sc->tx_npoll_off = offset;
444 #endif
445 
446 	/* Allocate interrupt */
447 	error = ix_alloc_intr(sc);
448 	if (error)
449 		goto failed;
450 
451 	/* Setup serializes */
452 	ix_setup_serialize(sc);
453 
454 	/* Allocate multicast array memory. */
455 	sc->mta = kmalloc(IXGBE_ETH_LENGTH_OF_ADDRESS * IX_MAX_MCASTADDR,
456 	    M_DEVBUF, M_WAITOK);
457 
458 	/* Initialize the shared code */
459 	hw->allow_unsupported_sfp = ix_unsupported_sfp;
460 	error = ixgbe_init_shared_code(hw);
461 	if (error == IXGBE_ERR_SFP_NOT_PRESENT) {
462 		/*
463 		 * No optics in this port; ask timer routine
464 		 * to probe for later insertion.
465 		 */
466 		sc->sfp_probe = TRUE;
467 		error = 0;
468 	} else if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) {
469 		device_printf(dev, "Unsupported SFP+ module detected!\n");
470 		error = EIO;
471 		goto failed;
472 	} else if (error) {
473 		device_printf(dev, "Unable to initialize the shared code\n");
474 		error = EIO;
475 		goto failed;
476 	}
477 
478 	/* Make sure we have a good EEPROM before we read from it */
479 	if (ixgbe_validate_eeprom_checksum(&sc->hw, &csum) < 0) {
480 		device_printf(dev, "The EEPROM Checksum Is Not Valid\n");
481 		error = EIO;
482 		goto failed;
483 	}
484 
485 	error = ixgbe_init_hw(hw);
486 	if (error == IXGBE_ERR_EEPROM_VERSION) {
487 		device_printf(dev, "Pre-production device detected\n");
488 	} else if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) {
489 		device_printf(dev, "Unsupported SFP+ Module\n");
490 		error = EIO;
491 		goto failed;
492 	} else if (error == IXGBE_ERR_SFP_NOT_PRESENT) {
493 		device_printf(dev, "No SFP+ Module found\n");
494 	}
495 
496 	/* Detect and set physical type */
497 	ix_setup_optics(sc);
498 
499 	/* Setup OS specific network interface */
500 	ix_setup_ifp(sc);
501 
502 	/* Add sysctl tree */
503 	ix_add_sysctl(sc);
504 
505 	error = ix_setup_intr(sc);
506 	if (error) {
507 		ether_ifdetach(&sc->arpcom.ac_if);
508 		goto failed;
509 	}
510 
511 	/* Initialize statistics */
512 	ix_update_stats(sc);
513 
514 	/*
515 	 * Check PCIE slot type/speed/width
516 	 */
517 	ix_slot_info(sc);
518 
519 	/* Set an initial default flow control value */
520 	sc->fc = ixgbe_fc_full;
521 
522 	/* Let hardware know driver is loaded */
523 	ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT);
524 	ctrl_ext |= IXGBE_CTRL_EXT_DRV_LOAD;
525 	IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext);
526 
527 	return 0;
528 failed:
529 	ix_detach(dev);
530 	return error;
531 }
532 
533 static int
534 ix_detach(device_t dev)
535 {
536 	struct ix_softc *sc = device_get_softc(dev);
537 
538 	if (device_is_attached(dev)) {
539 		struct ifnet *ifp = &sc->arpcom.ac_if;
540 		uint32_t ctrl_ext;
541 
542 		ifnet_serialize_all(ifp);
543 
544 		ix_stop(sc);
545 		ix_teardown_intr(sc, sc->intr_cnt);
546 
547 		ifnet_deserialize_all(ifp);
548 
549 		callout_terminate(&sc->timer);
550 		ether_ifdetach(ifp);
551 
552 		/* Let hardware know driver is unloading */
553 		ctrl_ext = IXGBE_READ_REG(&sc->hw, IXGBE_CTRL_EXT);
554 		ctrl_ext &= ~IXGBE_CTRL_EXT_DRV_LOAD;
555 		IXGBE_WRITE_REG(&sc->hw, IXGBE_CTRL_EXT, ctrl_ext);
556 	}
557 
558 	ifmedia_removeall(&sc->media);
559 	bus_generic_detach(dev);
560 
561 	ix_free_intr(sc);
562 
563 	if (sc->msix_mem_res != NULL) {
564 		bus_release_resource(dev, SYS_RES_MEMORY, sc->msix_mem_rid,
565 		    sc->msix_mem_res);
566 	}
567 	if (sc->mem_res != NULL) {
568 		bus_release_resource(dev, SYS_RES_MEMORY, sc->mem_rid,
569 		    sc->mem_res);
570 	}
571 
572 	ix_free_rings(sc);
573 
574 	if (sc->mta != NULL)
575 		kfree(sc->mta, M_DEVBUF);
576 	if (sc->serializes != NULL)
577 		kfree(sc->serializes, M_DEVBUF);
578 
579 	return 0;
580 }
581 
582 static int
583 ix_shutdown(device_t dev)
584 {
585 	struct ix_softc *sc = device_get_softc(dev);
586 	struct ifnet *ifp = &sc->arpcom.ac_if;
587 
588 	ifnet_serialize_all(ifp);
589 	ix_stop(sc);
590 	ifnet_deserialize_all(ifp);
591 
592 	return 0;
593 }
594 
595 static void
596 ix_start(struct ifnet *ifp, struct ifaltq_subque *ifsq)
597 {
598 	struct ix_softc *sc = ifp->if_softc;
599 	struct ix_tx_ring *txr = ifsq_get_priv(ifsq);
600 	int idx = -1;
601 	uint16_t nsegs;
602 
603 	KKASSERT(txr->tx_ifsq == ifsq);
604 	ASSERT_SERIALIZED(&txr->tx_serialize);
605 
606 	if ((ifp->if_flags & IFF_RUNNING) == 0 || ifsq_is_oactive(ifsq))
607 		return;
608 
609 	if (!sc->link_active || (txr->tx_flags & IX_TXFLAG_ENABLED) == 0) {
610 		ifsq_purge(ifsq);
611 		return;
612 	}
613 
614 	while (!ifsq_is_empty(ifsq)) {
615 		struct mbuf *m_head;
616 
617 		if (txr->tx_avail <= IX_MAX_SCATTER + IX_TX_RESERVED) {
618 			ifsq_set_oactive(ifsq);
619 			txr->tx_watchdog.wd_timer = 5;
620 			break;
621 		}
622 
623 		m_head = ifsq_dequeue(ifsq);
624 		if (m_head == NULL)
625 			break;
626 
627 		if (ix_encap(txr, &m_head, &nsegs, &idx)) {
628 			IFNET_STAT_INC(ifp, oerrors, 1);
629 			continue;
630 		}
631 
632 		/*
633 		 * TX interrupt are aggressively aggregated, so increasing
634 		 * opackets at TX interrupt time will make the opackets
635 		 * statistics vastly inaccurate; we do the opackets increment
636 		 * now.
637 		 */
638 		IFNET_STAT_INC(ifp, opackets, 1);
639 
640 		if (nsegs >= txr->tx_wreg_nsegs) {
641 			IXGBE_WRITE_REG(&sc->hw, IXGBE_TDT(txr->tx_idx), idx);
642 			nsegs = 0;
643 			idx = -1;
644 		}
645 
646 		ETHER_BPF_MTAP(ifp, m_head);
647 	}
648 	if (idx >= 0)
649 		IXGBE_WRITE_REG(&sc->hw, IXGBE_TDT(txr->tx_idx), idx);
650 }
651 
652 static int
653 ix_ioctl(struct ifnet *ifp, u_long command, caddr_t data, struct ucred *cr)
654 {
655 	struct ix_softc *sc = ifp->if_softc;
656 	struct ifreq *ifr = (struct ifreq *) data;
657 	int error = 0, mask, reinit;
658 
659 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
660 
661 	switch (command) {
662 	case SIOCSIFMTU:
663 		if (ifr->ifr_mtu > IX_MAX_FRAME_SIZE - ETHER_HDR_LEN) {
664 			error = EINVAL;
665 		} else {
666 			ifp->if_mtu = ifr->ifr_mtu;
667 			sc->max_frame_size =
668 			    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
669 			ix_init(sc);
670 		}
671 		break;
672 
673 	case SIOCSIFFLAGS:
674 		if (ifp->if_flags & IFF_UP) {
675 			if (ifp->if_flags & IFF_RUNNING) {
676 				if ((ifp->if_flags ^ sc->if_flags) &
677 				    (IFF_PROMISC | IFF_ALLMULTI))
678 					ix_set_promisc(sc);
679 			} else {
680 				ix_init(sc);
681 			}
682 		} else if (ifp->if_flags & IFF_RUNNING) {
683 			ix_stop(sc);
684 		}
685 		sc->if_flags = ifp->if_flags;
686 		break;
687 
688 	case SIOCADDMULTI:
689 	case SIOCDELMULTI:
690 		if (ifp->if_flags & IFF_RUNNING) {
691 			ix_disable_intr(sc);
692 			ix_set_multi(sc);
693 #ifdef IFPOLL_ENABLE
694 			if ((ifp->if_flags & IFF_NPOLLING) == 0)
695 #endif
696 				ix_enable_intr(sc);
697 		}
698 		break;
699 
700 	case SIOCSIFMEDIA:
701 	case SIOCGIFMEDIA:
702 		error = ifmedia_ioctl(ifp, ifr, &sc->media, command);
703 		break;
704 
705 	case SIOCSIFCAP:
706 		reinit = 0;
707 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
708 		if (mask & IFCAP_RXCSUM) {
709 			ifp->if_capenable ^= IFCAP_RXCSUM;
710 			reinit = 1;
711 		}
712 		if (mask & IFCAP_VLAN_HWTAGGING) {
713 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
714 			reinit = 1;
715 		}
716 		if (mask & IFCAP_TXCSUM) {
717 			ifp->if_capenable ^= IFCAP_TXCSUM;
718 			if (ifp->if_capenable & IFCAP_TXCSUM)
719 				ifp->if_hwassist |= CSUM_OFFLOAD;
720 			else
721 				ifp->if_hwassist &= ~CSUM_OFFLOAD;
722 		}
723 		if (mask & IFCAP_TSO) {
724 			ifp->if_capenable ^= IFCAP_TSO;
725 			if (ifp->if_capenable & IFCAP_TSO)
726 				ifp->if_hwassist |= CSUM_TSO;
727 			else
728 				ifp->if_hwassist &= ~CSUM_TSO;
729 		}
730 		if (mask & IFCAP_RSS)
731 			ifp->if_capenable ^= IFCAP_RSS;
732 		if (reinit && (ifp->if_flags & IFF_RUNNING))
733 			ix_init(sc);
734 		break;
735 
736 #if 0
737 	case SIOCGI2C:
738 	{
739 		struct ixgbe_i2c_req	i2c;
740 		error = copyin(ifr->ifr_data, &i2c, sizeof(i2c));
741 		if (error)
742 			break;
743 		if ((i2c.dev_addr != 0xA0) || (i2c.dev_addr != 0xA2)){
744 			error = EINVAL;
745 			break;
746 		}
747 		hw->phy.ops.read_i2c_byte(hw, i2c.offset,
748 		    i2c.dev_addr, i2c.data);
749 		error = copyout(&i2c, ifr->ifr_data, sizeof(i2c));
750 		break;
751 	}
752 #endif
753 
754 	default:
755 		error = ether_ioctl(ifp, command, data);
756 		break;
757 	}
758 	return error;
759 }
760 
761 #define IXGBE_MHADD_MFS_SHIFT 16
762 
763 static void
764 ix_init(void *xsc)
765 {
766 	struct ix_softc *sc = xsc;
767 	struct ifnet *ifp = &sc->arpcom.ac_if;
768 	struct ixgbe_hw *hw = &sc->hw;
769 	uint32_t rxpb, frame, size, tmp;
770 	uint32_t gpie, rxctrl;
771 	int i, error;
772 	boolean_t polling;
773 
774 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
775 
776 	ix_stop(sc);
777 
778 	polling = FALSE;
779 #ifdef IFPOLL_ENABLE
780 	if (ifp->if_flags & IFF_NPOLLING)
781 		polling = TRUE;
782 #endif
783 
784 	/* Configure # of used RX/TX rings */
785 	ix_set_ring_inuse(sc, polling);
786 	ifq_set_subq_mask(&ifp->if_snd, sc->tx_ring_inuse - 1);
787 
788 	/* Get the latest mac address, User can use a LAA */
789 	bcopy(IF_LLADDR(ifp), hw->mac.addr, IXGBE_ETH_LENGTH_OF_ADDRESS);
790 	ixgbe_set_rar(hw, 0, hw->mac.addr, 0, 1);
791 	hw->addr_ctrl.rar_used_count = 1;
792 
793 	/* Prepare transmit descriptors and buffers */
794 	for (i = 0; i < sc->tx_ring_inuse; ++i)
795 		ix_init_tx_ring(&sc->tx_rings[i]);
796 
797 	ixgbe_init_hw(hw);
798 	ix_init_tx_unit(sc);
799 
800 	/* Setup Multicast table */
801 	ix_set_multi(sc);
802 
803 	/* Prepare receive descriptors and buffers */
804 	for (i = 0; i < sc->rx_ring_inuse; ++i) {
805 		error = ix_init_rx_ring(&sc->rx_rings[i]);
806 		if (error) {
807 			if_printf(ifp, "Could not initialize RX ring%d\n", i);
808 			ix_stop(sc);
809 			return;
810 		}
811 	}
812 
813 	/* Configure RX settings */
814 	ix_init_rx_unit(sc);
815 
816 	gpie = IXGBE_READ_REG(hw, IXGBE_GPIE);
817 
818 	/* Enable Fan Failure Interrupt */
819 	gpie |= IXGBE_SDP1_GPIEN;
820 
821 	/* Add for Module detection */
822 	if (hw->mac.type == ixgbe_mac_82599EB)
823 		gpie |= IXGBE_SDP2_GPIEN;
824 
825 	/* Thermal Failure Detection */
826 	if (hw->mac.type == ixgbe_mac_X540)
827 		gpie |= IXGBE_SDP0_GPIEN;
828 
829 	if (sc->intr_type == PCI_INTR_TYPE_MSIX) {
830 		/* Enable Enhanced MSIX mode */
831 		gpie |= IXGBE_GPIE_MSIX_MODE;
832 		gpie |= IXGBE_GPIE_EIAME | IXGBE_GPIE_PBA_SUPPORT |
833 		    IXGBE_GPIE_OCD;
834 	}
835 	IXGBE_WRITE_REG(hw, IXGBE_GPIE, gpie);
836 
837 	/* Set MTU size */
838 	if (ifp->if_mtu > ETHERMTU) {
839 		uint32_t mhadd;
840 
841 		mhadd = IXGBE_READ_REG(hw, IXGBE_MHADD);
842 		mhadd &= ~IXGBE_MHADD_MFS_MASK;
843 		mhadd |= sc->max_frame_size << IXGBE_MHADD_MFS_SHIFT;
844 		IXGBE_WRITE_REG(hw, IXGBE_MHADD, mhadd);
845 	}
846 
847 	/*
848 	 * Enable TX rings
849 	 */
850 	for (i = 0; i < sc->tx_ring_inuse; ++i) {
851 		uint32_t txdctl;
852 
853 		txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(i));
854 		txdctl |= IXGBE_TXDCTL_ENABLE;
855 
856 		/*
857 		 * Set WTHRESH to 0, since TX head write-back is used
858 		 */
859 		txdctl &= ~(0x7f << 16);
860 
861 		/*
862 		 * When the internal queue falls below PTHRESH (32),
863 		 * start prefetching as long as there are at least
864 		 * HTHRESH (1) buffers ready. The values are taken
865 		 * from the Intel linux driver 3.8.21.
866 		 * Prefetching enables tx line rate even with 1 queue.
867 		 */
868 		txdctl |= (32 << 0) | (1 << 8);
869 		IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(i), txdctl);
870 	}
871 
872 	/*
873 	 * Enable RX rings
874 	 */
875 	for (i = 0; i < sc->rx_ring_inuse; ++i) {
876 		uint32_t rxdctl;
877 		int k;
878 
879 		rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
880 		if (hw->mac.type == ixgbe_mac_82598EB) {
881 			/*
882 			 * PTHRESH = 21
883 			 * HTHRESH = 4
884 			 * WTHRESH = 8
885 			 */
886 			rxdctl &= ~0x3FFFFF;
887 			rxdctl |= 0x080420;
888 		}
889 		rxdctl |= IXGBE_RXDCTL_ENABLE;
890 		IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), rxdctl);
891 		for (k = 0; k < 10; ++k) {
892 			if (IXGBE_READ_REG(hw, IXGBE_RXDCTL(i)) &
893 			    IXGBE_RXDCTL_ENABLE)
894 				break;
895 			else
896 				msec_delay(1);
897 		}
898 		wmb();
899 		IXGBE_WRITE_REG(hw, IXGBE_RDT(i),
900 		    sc->rx_rings[0].rx_ndesc - 1);
901 	}
902 
903 	/* Set up VLAN support and filter */
904 	ix_set_vlan(sc);
905 
906 	/* Enable Receive engine */
907 	rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
908 	if (hw->mac.type == ixgbe_mac_82598EB)
909 		rxctrl |= IXGBE_RXCTRL_DMBYPS;
910 	rxctrl |= IXGBE_RXCTRL_RXEN;
911 	ixgbe_enable_rx_dma(hw, rxctrl);
912 
913 	for (i = 0; i < sc->tx_ring_inuse; ++i) {
914 		const struct ix_tx_ring *txr = &sc->tx_rings[i];
915 
916 		if (txr->tx_intr_vec >= 0) {
917 			ix_set_ivar(sc, i, txr->tx_intr_vec, 1);
918 		} else {
919 			/*
920 			 * Unconfigured TX interrupt vector could only
921 			 * happen for MSI-X.
922 			 */
923 			KASSERT(sc->intr_type == PCI_INTR_TYPE_MSIX,
924 			    ("TX intr vector is not set"));
925 			KASSERT(i < sc->rx_ring_inuse,
926 			    ("invalid TX ring %d, no piggyback RX ring", i));
927 			KASSERT(sc->rx_rings[i].rx_txr == txr,
928 			    ("RX ring %d piggybacked TX ring mismatch", i));
929 			if (bootverbose)
930 				if_printf(ifp, "IVAR skips TX ring %d\n", i);
931 		}
932 	}
933 	for (i = 0; i < sc->rx_ring_inuse; ++i) {
934 		const struct ix_rx_ring *rxr = &sc->rx_rings[i];
935 
936 		KKASSERT(rxr->rx_intr_vec >= 0);
937 		ix_set_ivar(sc, i, rxr->rx_intr_vec, 0);
938 		if (rxr->rx_txr != NULL) {
939 			/*
940 			 * Piggyback the TX ring interrupt onto the RX
941 			 * ring interrupt vector.
942 			 */
943 			KASSERT(rxr->rx_txr->tx_intr_vec < 0,
944 			    ("piggybacked TX ring configured intr vector"));
945 			KASSERT(rxr->rx_txr->tx_idx == i,
946 			    ("RX ring %d piggybacked TX ring %u",
947 			     i, rxr->rx_txr->tx_idx));
948 			ix_set_ivar(sc, i, rxr->rx_intr_vec, 1);
949 			if (bootverbose) {
950 				if_printf(ifp, "IVAR RX ring %d piggybacks "
951 				    "TX ring %u\n", i, rxr->rx_txr->tx_idx);
952 			}
953 		}
954 	}
955 	if (sc->intr_type == PCI_INTR_TYPE_MSIX) {
956 		/* Set up status MSI-X vector; it is using fixed entry 1 */
957 		ix_set_ivar(sc, 1, sc->sts_msix_vec, -1);
958 
959 		/* Set up auto-mask for TX and RX rings */
960 		if (hw->mac.type == ixgbe_mac_82598EB) {
961 			IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EIMS_RTX_QUEUE);
962 		} else {
963 			IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(0), 0xFFFFFFFF);
964 			IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(1), 0xFFFFFFFF);
965 		}
966 	} else {
967 		IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EIMS_RTX_QUEUE);
968 	}
969 	for (i = 0; i < sc->intr_cnt; ++i)
970 		ix_set_eitr(sc, i, sc->intr_data[i].intr_rate);
971 
972 	/*
973 	 * Check on any SFP devices that need to be kick-started
974 	 */
975 	if (hw->phy.type == ixgbe_phy_none) {
976 		error = hw->phy.ops.identify(hw);
977 		if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) {
978 			if_printf(ifp,
979 			    "Unsupported SFP+ module type was detected.\n");
980 			/* XXX stop */
981 			return;
982 		}
983 	}
984 
985 	/* Config/Enable Link */
986 	ix_config_link(sc);
987 
988 	/*
989 	 * Hardware Packet Buffer & Flow Control setup
990 	 */
991 	frame = sc->max_frame_size;
992 
993 	/* Calculate High Water */
994 	if (hw->mac.type == ixgbe_mac_X540)
995 		tmp = IXGBE_DV_X540(frame, frame);
996 	else
997 		tmp = IXGBE_DV(frame, frame);
998 	size = IXGBE_BT2KB(tmp);
999 	rxpb = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(0)) >> 10;
1000 	hw->fc.high_water[0] = rxpb - size;
1001 
1002 	/* Now calculate Low Water */
1003 	if (hw->mac.type == ixgbe_mac_X540)
1004 		tmp = IXGBE_LOW_DV_X540(frame);
1005 	else
1006 		tmp = IXGBE_LOW_DV(frame);
1007 	hw->fc.low_water[0] = IXGBE_BT2KB(tmp);
1008 
1009 	hw->fc.requested_mode = sc->fc;
1010 	hw->fc.pause_time = IX_FC_PAUSE;
1011 	hw->fc.send_xon = TRUE;
1012 
1013 	/* Initialize the FC settings */
1014 	ixgbe_start_hw(hw);
1015 
1016 	/*
1017 	 * Only enable interrupts if we are not polling, make sure
1018 	 * they are off otherwise.
1019 	 */
1020 	if (polling)
1021 		ix_disable_intr(sc);
1022 	else
1023 		ix_enable_intr(sc);
1024 
1025 	ifp->if_flags |= IFF_RUNNING;
1026 	for (i = 0; i < sc->tx_ring_inuse; ++i) {
1027 		ifsq_clr_oactive(sc->tx_rings[i].tx_ifsq);
1028 		ifsq_watchdog_start(&sc->tx_rings[i].tx_watchdog);
1029 	}
1030 
1031 	ix_set_timer_cpuid(sc, polling);
1032 	callout_reset_bycpu(&sc->timer, hz, ix_timer, sc, sc->timer_cpuid);
1033 }
1034 
1035 static void
1036 ix_intr(void *xsc)
1037 {
1038 	struct ix_softc *sc = xsc;
1039 	struct ixgbe_hw	*hw = &sc->hw;
1040 	uint32_t eicr;
1041 
1042 	ASSERT_SERIALIZED(&sc->main_serialize);
1043 
1044 	eicr = IXGBE_READ_REG(hw, IXGBE_EICR);
1045 	if (eicr == 0) {
1046 		IXGBE_WRITE_REG(hw, IXGBE_EIMS, sc->intr_mask);
1047 		return;
1048 	}
1049 
1050 	if (eicr & IX_RX0_INTR_MASK) {
1051 		struct ix_rx_ring *rxr = &sc->rx_rings[0];
1052 
1053 		lwkt_serialize_enter(&rxr->rx_serialize);
1054 		ix_rxeof(rxr, -1);
1055 		lwkt_serialize_exit(&rxr->rx_serialize);
1056 	}
1057 	if (eicr & IX_RX1_INTR_MASK) {
1058 		struct ix_rx_ring *rxr;
1059 
1060 		KKASSERT(sc->rx_ring_inuse == IX_MIN_RXRING_RSS);
1061 		rxr = &sc->rx_rings[1];
1062 
1063 		lwkt_serialize_enter(&rxr->rx_serialize);
1064 		ix_rxeof(rxr, -1);
1065 		lwkt_serialize_exit(&rxr->rx_serialize);
1066 	}
1067 
1068 	if (eicr & IX_TX_INTR_MASK) {
1069 		struct ix_tx_ring *txr = &sc->tx_rings[0];
1070 
1071 		lwkt_serialize_enter(&txr->tx_serialize);
1072 		ix_txeof(txr, *(txr->tx_hdr));
1073 		if (!ifsq_is_empty(txr->tx_ifsq))
1074 			ifsq_devstart(txr->tx_ifsq);
1075 		lwkt_serialize_exit(&txr->tx_serialize);
1076 	}
1077 
1078 	if (__predict_false(eicr & IX_EICR_STATUS))
1079 		ix_intr_status(sc, eicr);
1080 
1081 	IXGBE_WRITE_REG(hw, IXGBE_EIMS, sc->intr_mask);
1082 }
1083 
1084 static void
1085 ix_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1086 {
1087 	struct ix_softc *sc = ifp->if_softc;
1088 
1089 	ix_update_link_status(sc);
1090 
1091 	ifmr->ifm_status = IFM_AVALID;
1092 	ifmr->ifm_active = IFM_ETHER;
1093 
1094 	if (!sc->link_active) {
1095 		ifmr->ifm_active |= IFM_NONE;
1096 		return;
1097 	}
1098 
1099 	ifmr->ifm_status |= IFM_ACTIVE;
1100 
1101 	switch (sc->link_speed) {
1102 	case IXGBE_LINK_SPEED_100_FULL:
1103 		ifmr->ifm_active |= IFM_100_TX | IFM_FDX;
1104 		break;
1105 	case IXGBE_LINK_SPEED_1GB_FULL:
1106 		ifmr->ifm_active |= IFM_1000_SX | IFM_FDX;
1107 		break;
1108 	case IXGBE_LINK_SPEED_10GB_FULL:
1109 		ifmr->ifm_active |= sc->optics | IFM_FDX;
1110 		break;
1111 	default:
1112 		ifmr->ifm_active |= IFM_NONE;
1113 		break;
1114 	}
1115 }
1116 
1117 static int
1118 ix_media_change(struct ifnet *ifp)
1119 {
1120 	struct ix_softc *sc = ifp->if_softc;
1121 	struct ifmedia *ifm = &sc->media;
1122 
1123 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1124 		return EINVAL;
1125 
1126 	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1127 	case IFM_AUTO:
1128 		sc->hw.phy.autoneg_advertised =
1129 		    IXGBE_LINK_SPEED_100_FULL |
1130 		    IXGBE_LINK_SPEED_1GB_FULL |
1131 		    IXGBE_LINK_SPEED_10GB_FULL;
1132 		break;
1133 	default:
1134 		if_printf(ifp, "Only auto media type\n");
1135 		return EINVAL;
1136 	}
1137 	return 0;
1138 }
1139 
1140 static __inline int
1141 ix_tso_pullup(struct mbuf **mp)
1142 {
1143 	int hoff, iphlen, thoff;
1144 	struct mbuf *m;
1145 
1146 	m = *mp;
1147 	KASSERT(M_WRITABLE(m), ("TSO mbuf not writable"));
1148 
1149 	iphlen = m->m_pkthdr.csum_iphlen;
1150 	thoff = m->m_pkthdr.csum_thlen;
1151 	hoff = m->m_pkthdr.csum_lhlen;
1152 
1153 	KASSERT(iphlen > 0, ("invalid ip hlen"));
1154 	KASSERT(thoff > 0, ("invalid tcp hlen"));
1155 	KASSERT(hoff > 0, ("invalid ether hlen"));
1156 
1157 	if (__predict_false(m->m_len < hoff + iphlen + thoff)) {
1158 		m = m_pullup(m, hoff + iphlen + thoff);
1159 		if (m == NULL) {
1160 			*mp = NULL;
1161 			return ENOBUFS;
1162 		}
1163 		*mp = m;
1164 	}
1165 	return 0;
1166 }
1167 
1168 static int
1169 ix_encap(struct ix_tx_ring *txr, struct mbuf **m_headp,
1170     uint16_t *segs_used, int *idx)
1171 {
1172 	uint32_t olinfo_status = 0, cmd_type_len, cmd_rs = 0;
1173 	int i, j, error, nsegs, first, maxsegs;
1174 	struct mbuf *m_head = *m_headp;
1175 	bus_dma_segment_t segs[IX_MAX_SCATTER];
1176 	bus_dmamap_t map;
1177 	struct ix_tx_buf *txbuf;
1178 	union ixgbe_adv_tx_desc *txd = NULL;
1179 
1180 	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1181 		error = ix_tso_pullup(m_headp);
1182 		if (__predict_false(error))
1183 			return error;
1184 		m_head = *m_headp;
1185 	}
1186 
1187 	/* Basic descriptor defines */
1188 	cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
1189 	    IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
1190 
1191 	if (m_head->m_flags & M_VLANTAG)
1192 		cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
1193 
1194 	/*
1195 	 * Important to capture the first descriptor
1196 	 * used because it will contain the index of
1197 	 * the one we tell the hardware to report back
1198 	 */
1199 	first = txr->tx_next_avail;
1200 	txbuf = &txr->tx_buf[first];
1201 	map = txbuf->map;
1202 
1203 	/*
1204 	 * Map the packet for DMA.
1205 	 */
1206 	maxsegs = txr->tx_avail - IX_TX_RESERVED;
1207 	if (maxsegs > IX_MAX_SCATTER)
1208 		maxsegs = IX_MAX_SCATTER;
1209 
1210 	error = bus_dmamap_load_mbuf_defrag(txr->tx_tag, map, m_headp,
1211 	    segs, maxsegs, &nsegs, BUS_DMA_NOWAIT);
1212 	if (__predict_false(error)) {
1213 		m_freem(*m_headp);
1214 		*m_headp = NULL;
1215 		return error;
1216 	}
1217 	bus_dmamap_sync(txr->tx_tag, map, BUS_DMASYNC_PREWRITE);
1218 
1219 	m_head = *m_headp;
1220 
1221 	/*
1222 	 * Set up the appropriate offload context if requested,
1223 	 * this may consume one TX descriptor.
1224 	 */
1225 	if (ix_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status)) {
1226 		(*segs_used)++;
1227 		txr->tx_nsegs++;
1228 	}
1229 
1230 	*segs_used += nsegs;
1231 	txr->tx_nsegs += nsegs;
1232 	if (txr->tx_nsegs >= txr->tx_intr_nsegs) {
1233 		/*
1234 		 * Report Status (RS) is turned on every intr_nsegs
1235 		 * descriptors (roughly).
1236 		 */
1237 		txr->tx_nsegs = 0;
1238 		cmd_rs = IXGBE_TXD_CMD_RS;
1239 	}
1240 
1241 	i = txr->tx_next_avail;
1242 	for (j = 0; j < nsegs; j++) {
1243 		bus_size_t seglen;
1244 		bus_addr_t segaddr;
1245 
1246 		txbuf = &txr->tx_buf[i];
1247 		txd = &txr->tx_base[i];
1248 		seglen = segs[j].ds_len;
1249 		segaddr = htole64(segs[j].ds_addr);
1250 
1251 		txd->read.buffer_addr = segaddr;
1252 		txd->read.cmd_type_len = htole32(IXGBE_TXD_CMD_IFCS |
1253 		    cmd_type_len |seglen);
1254 		txd->read.olinfo_status = htole32(olinfo_status);
1255 
1256 		if (++i == txr->tx_ndesc)
1257 			i = 0;
1258 	}
1259 	txd->read.cmd_type_len |= htole32(IXGBE_TXD_CMD_EOP | cmd_rs);
1260 
1261 	txr->tx_avail -= nsegs;
1262 	txr->tx_next_avail = i;
1263 
1264 	txbuf->m_head = m_head;
1265 	txr->tx_buf[first].map = txbuf->map;
1266 	txbuf->map = map;
1267 
1268 	/*
1269 	 * Defer TDT updating, until enough descrptors are setup
1270 	 */
1271 	*idx = i;
1272 
1273 	return 0;
1274 }
1275 
1276 static void
1277 ix_set_promisc(struct ix_softc *sc)
1278 {
1279 	struct ifnet *ifp = &sc->arpcom.ac_if;
1280 	uint32_t reg_rctl;
1281 	int mcnt = 0;
1282 
1283 	reg_rctl = IXGBE_READ_REG(&sc->hw, IXGBE_FCTRL);
1284 	reg_rctl &= ~IXGBE_FCTRL_UPE;
1285 	if (ifp->if_flags & IFF_ALLMULTI) {
1286 		mcnt = IX_MAX_MCASTADDR;
1287 	} else {
1288 		struct ifmultiaddr *ifma;
1289 
1290 		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1291 			if (ifma->ifma_addr->sa_family != AF_LINK)
1292 				continue;
1293 			if (mcnt == IX_MAX_MCASTADDR)
1294 				break;
1295 			mcnt++;
1296 		}
1297 	}
1298 	if (mcnt < IX_MAX_MCASTADDR)
1299 		reg_rctl &= ~IXGBE_FCTRL_MPE;
1300 	IXGBE_WRITE_REG(&sc->hw, IXGBE_FCTRL, reg_rctl);
1301 
1302 	if (ifp->if_flags & IFF_PROMISC) {
1303 		reg_rctl |= IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE;
1304 		IXGBE_WRITE_REG(&sc->hw, IXGBE_FCTRL, reg_rctl);
1305 	} else if (ifp->if_flags & IFF_ALLMULTI) {
1306 		reg_rctl |= IXGBE_FCTRL_MPE;
1307 		reg_rctl &= ~IXGBE_FCTRL_UPE;
1308 		IXGBE_WRITE_REG(&sc->hw, IXGBE_FCTRL, reg_rctl);
1309 	}
1310 }
1311 
1312 static void
1313 ix_set_multi(struct ix_softc *sc)
1314 {
1315 	struct ifnet *ifp = &sc->arpcom.ac_if;
1316 	struct ifmultiaddr *ifma;
1317 	uint32_t fctrl;
1318 	uint8_t	*mta;
1319 	int mcnt = 0;
1320 
1321 	mta = sc->mta;
1322 	bzero(mta, IXGBE_ETH_LENGTH_OF_ADDRESS * IX_MAX_MCASTADDR);
1323 
1324 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1325 		if (ifma->ifma_addr->sa_family != AF_LINK)
1326 			continue;
1327 		if (mcnt == IX_MAX_MCASTADDR)
1328 			break;
1329 		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1330 		    &mta[mcnt * IXGBE_ETH_LENGTH_OF_ADDRESS],
1331 		    IXGBE_ETH_LENGTH_OF_ADDRESS);
1332 		mcnt++;
1333 	}
1334 
1335 	fctrl = IXGBE_READ_REG(&sc->hw, IXGBE_FCTRL);
1336 	fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1337 	if (ifp->if_flags & IFF_PROMISC) {
1338 		fctrl |= IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE;
1339 	} else if (mcnt >= IX_MAX_MCASTADDR || (ifp->if_flags & IFF_ALLMULTI)) {
1340 		fctrl |= IXGBE_FCTRL_MPE;
1341 		fctrl &= ~IXGBE_FCTRL_UPE;
1342 	} else {
1343 		fctrl &= ~(IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1344 	}
1345 	IXGBE_WRITE_REG(&sc->hw, IXGBE_FCTRL, fctrl);
1346 
1347 	if (mcnt < IX_MAX_MCASTADDR) {
1348 		ixgbe_update_mc_addr_list(&sc->hw,
1349 		    mta, mcnt, ix_mc_array_itr, TRUE);
1350 	}
1351 }
1352 
1353 /*
1354  * This is an iterator function now needed by the multicast
1355  * shared code. It simply feeds the shared code routine the
1356  * addresses in the array of ix_set_multi() one by one.
1357  */
1358 static uint8_t *
1359 ix_mc_array_itr(struct ixgbe_hw *hw, uint8_t **update_ptr, uint32_t *vmdq)
1360 {
1361 	uint8_t *addr = *update_ptr;
1362 	uint8_t *newptr;
1363 	*vmdq = 0;
1364 
1365 	newptr = addr + IXGBE_ETH_LENGTH_OF_ADDRESS;
1366 	*update_ptr = newptr;
1367 	return addr;
1368 }
1369 
1370 static void
1371 ix_timer(void *arg)
1372 {
1373 	struct ix_softc *sc = arg;
1374 
1375 	lwkt_serialize_enter(&sc->main_serialize);
1376 
1377 	if ((sc->arpcom.ac_if.if_flags & IFF_RUNNING) == 0) {
1378 		lwkt_serialize_exit(&sc->main_serialize);
1379 		return;
1380 	}
1381 
1382 	/* Check for pluggable optics */
1383 	if (sc->sfp_probe) {
1384 		if (!ix_sfp_probe(sc))
1385 			goto done; /* Nothing to do */
1386 	}
1387 
1388 	ix_update_link_status(sc);
1389 	ix_update_stats(sc);
1390 
1391 done:
1392 	callout_reset_bycpu(&sc->timer, hz, ix_timer, sc, sc->timer_cpuid);
1393 	lwkt_serialize_exit(&sc->main_serialize);
1394 }
1395 
1396 static void
1397 ix_update_link_status(struct ix_softc *sc)
1398 {
1399 	struct ifnet *ifp = &sc->arpcom.ac_if;
1400 
1401 	if (sc->link_up) {
1402 		if (sc->link_active == FALSE) {
1403 			if (bootverbose) {
1404 				if_printf(ifp, "Link is up %d Gbps %s\n",
1405 				    sc->link_speed == 128 ? 10 : 1,
1406 				    "Full Duplex");
1407 			}
1408 			sc->link_active = TRUE;
1409 
1410 			/* Update any Flow Control changes */
1411 			ixgbe_fc_enable(&sc->hw);
1412 
1413 			ifp->if_link_state = LINK_STATE_UP;
1414 			if_link_state_change(ifp);
1415 		}
1416 	} else { /* Link down */
1417 		if (sc->link_active == TRUE) {
1418 			if (bootverbose)
1419 				if_printf(ifp, "Link is Down\n");
1420 			ifp->if_link_state = LINK_STATE_DOWN;
1421 			if_link_state_change(ifp);
1422 
1423 			sc->link_active = FALSE;
1424 		}
1425 	}
1426 }
1427 
1428 static void
1429 ix_stop(struct ix_softc *sc)
1430 {
1431 	struct ixgbe_hw *hw = &sc->hw;
1432 	struct ifnet *ifp = &sc->arpcom.ac_if;
1433 	int i;
1434 
1435 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
1436 
1437 	ix_disable_intr(sc);
1438 	callout_stop(&sc->timer);
1439 
1440 	ifp->if_flags &= ~IFF_RUNNING;
1441 	for (i = 0; i < sc->tx_ring_cnt; ++i) {
1442 		struct ix_tx_ring *txr = &sc->tx_rings[i];
1443 
1444 		ifsq_clr_oactive(txr->tx_ifsq);
1445 		ifsq_watchdog_stop(&txr->tx_watchdog);
1446 		txr->tx_flags &= ~IX_TXFLAG_ENABLED;
1447 	}
1448 
1449 	ixgbe_reset_hw(hw);
1450 	hw->adapter_stopped = FALSE;
1451 	ixgbe_stop_adapter(hw);
1452 	if (hw->mac.type == ixgbe_mac_82599EB)
1453 		ixgbe_stop_mac_link_on_d3_82599(hw);
1454 	/* Turn off the laser - noop with no optics */
1455 	ixgbe_disable_tx_laser(hw);
1456 
1457 	/* Update the stack */
1458 	sc->link_up = FALSE;
1459 	ix_update_link_status(sc);
1460 
1461 	/* Reprogram the RAR[0] in case user changed it. */
1462 	ixgbe_set_rar(hw, 0, hw->mac.addr, 0, IXGBE_RAH_AV);
1463 
1464 	for (i = 0; i < sc->tx_ring_cnt; ++i)
1465 		ix_free_tx_ring(&sc->tx_rings[i]);
1466 
1467 	for (i = 0; i < sc->rx_ring_cnt; ++i)
1468 		ix_free_rx_ring(&sc->rx_rings[i]);
1469 }
1470 
1471 static void
1472 ix_setup_optics(struct ix_softc *sc)
1473 {
1474 	struct ixgbe_hw *hw = &sc->hw;
1475 	int layer;
1476 
1477 	layer = ixgbe_get_supported_physical_layer(hw);
1478 
1479 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_T) {
1480 		sc->optics = IFM_10G_T;
1481 		return;
1482 	}
1483 
1484 	if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_T) {
1485 		sc->optics = IFM_1000_T;
1486 		return;
1487 	}
1488 
1489 	if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_SX) {
1490 		sc->optics = IFM_1000_SX;
1491 		return;
1492 	}
1493 
1494 	if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_LR |
1495 	    IXGBE_PHYSICAL_LAYER_10GBASE_LRM)) {
1496 		sc->optics = IFM_10G_LR;
1497 		return;
1498 	}
1499 
1500 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_SR) {
1501 		sc->optics = IFM_10G_SR;
1502 		return;
1503 	}
1504 
1505 	if (layer & IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU) {
1506 		sc->optics = IFM_10G_TWINAX;
1507 		return;
1508 	}
1509 
1510 	if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_KX4 |
1511 	    IXGBE_PHYSICAL_LAYER_10GBASE_CX4)) {
1512 		sc->optics = IFM_10G_CX4;
1513 		return;
1514 	}
1515 
1516 	/*
1517 	 * If we get here just set the default.
1518 	 * XXX this probably is wrong.
1519 	 */
1520 	sc->optics = IFM_AUTO;
1521 }
1522 
1523 static void
1524 ix_setup_ifp(struct ix_softc *sc)
1525 {
1526 	struct ixgbe_hw *hw = &sc->hw;
1527 	struct ifnet *ifp = &sc->arpcom.ac_if;
1528 	int i;
1529 
1530 	ifp->if_baudrate = IF_Gbps(10UL);
1531 
1532 	ifp->if_softc = sc;
1533 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1534 	ifp->if_init = ix_init;
1535 	ifp->if_ioctl = ix_ioctl;
1536 	ifp->if_start = ix_start;
1537 	ifp->if_serialize = ix_serialize;
1538 	ifp->if_deserialize = ix_deserialize;
1539 	ifp->if_tryserialize = ix_tryserialize;
1540 #ifdef INVARIANTS
1541 	ifp->if_serialize_assert = ix_serialize_assert;
1542 #endif
1543 #ifdef IFPOLL_ENABLE
1544 	ifp->if_npoll = ix_npoll;
1545 #endif
1546 
1547 	/* Increase TSO burst length */
1548 	ifp->if_tsolen = (8 * ETHERMTU);
1549 
1550 	ifp->if_nmbclusters = sc->rx_ring_cnt * sc->rx_rings[0].rx_ndesc;
1551 	ifp->if_nmbjclusters = ifp->if_nmbclusters;
1552 
1553 	ifq_set_maxlen(&ifp->if_snd, sc->tx_rings[0].tx_ndesc - 2);
1554 	ifq_set_ready(&ifp->if_snd);
1555 	ifq_set_subq_cnt(&ifp->if_snd, sc->tx_ring_cnt);
1556 
1557 	ifp->if_mapsubq = ifq_mapsubq_mask;
1558 	ifq_set_subq_mask(&ifp->if_snd, 0);
1559 
1560 	ether_ifattach(ifp, hw->mac.addr, NULL);
1561 
1562 	ifp->if_capabilities =
1563 	    IFCAP_HWCSUM | IFCAP_TSO | IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
1564 	if (IX_ENABLE_HWRSS(sc))
1565 		ifp->if_capabilities |= IFCAP_RSS;
1566 	ifp->if_capenable = ifp->if_capabilities;
1567 	ifp->if_hwassist = CSUM_OFFLOAD | CSUM_TSO;
1568 
1569 	/*
1570 	 * Tell the upper layer(s) we support long frames.
1571 	 */
1572 	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
1573 
1574 	/* Setup TX rings and subqueues */
1575 	for (i = 0; i < sc->tx_ring_cnt; ++i) {
1576 		struct ifaltq_subque *ifsq = ifq_get_subq(&ifp->if_snd, i);
1577 		struct ix_tx_ring *txr = &sc->tx_rings[i];
1578 
1579 		ifsq_set_cpuid(ifsq, txr->tx_intr_cpuid);
1580 		ifsq_set_priv(ifsq, txr);
1581 		ifsq_set_hw_serialize(ifsq, &txr->tx_serialize);
1582 		txr->tx_ifsq = ifsq;
1583 
1584 		ifsq_watchdog_init(&txr->tx_watchdog, ifsq, ix_watchdog);
1585 	}
1586 
1587 	/*
1588 	 * Specify the media types supported by this adapter and register
1589 	 * callbacks to update media and link information
1590 	 */
1591 	ifmedia_add(&sc->media, IFM_ETHER | sc->optics | IFM_FDX, 0, NULL);
1592 	if (hw->device_id == IXGBE_DEV_ID_82598AT) {
1593 		if (sc->optics != IFM_1000_T) {
1594 			ifmedia_add(&sc->media,
1595 			    IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
1596 		}
1597 	}
1598 	if (sc->optics != IFM_AUTO)
1599 		ifmedia_add(&sc->media, IFM_ETHER | IFM_AUTO, 0, NULL);
1600 	ifmedia_set(&sc->media, IFM_ETHER | IFM_AUTO);
1601 }
1602 
1603 static boolean_t
1604 ix_is_sfp(const struct ixgbe_hw *hw)
1605 {
1606 	switch (hw->phy.type) {
1607 	case ixgbe_phy_sfp_avago:
1608 	case ixgbe_phy_sfp_ftl:
1609 	case ixgbe_phy_sfp_intel:
1610 	case ixgbe_phy_sfp_unknown:
1611 	case ixgbe_phy_sfp_passive_tyco:
1612 	case ixgbe_phy_sfp_passive_unknown:
1613 		return TRUE;
1614 	default:
1615 		return FALSE;
1616 	}
1617 }
1618 
1619 static void
1620 ix_config_link(struct ix_softc *sc)
1621 {
1622 	struct ixgbe_hw *hw = &sc->hw;
1623 	boolean_t sfp;
1624 
1625 	sfp = ix_is_sfp(hw);
1626 	if (sfp) {
1627 		if (hw->phy.multispeed_fiber) {
1628 			hw->mac.ops.setup_sfp(hw);
1629 			ixgbe_enable_tx_laser(hw);
1630 			ix_handle_msf(sc);
1631 		} else {
1632 			ix_handle_mod(sc);
1633 		}
1634 	} else {
1635 		uint32_t autoneg, err = 0;
1636 
1637 		if (hw->mac.ops.check_link != NULL) {
1638 			err = ixgbe_check_link(hw, &sc->link_speed,
1639 			    &sc->link_up, FALSE);
1640 			if (err)
1641 				return;
1642 		}
1643 
1644 		autoneg = hw->phy.autoneg_advertised;
1645 		if (!autoneg && hw->mac.ops.get_link_capabilities != NULL) {
1646 			bool negotiate;
1647 
1648 			err = hw->mac.ops.get_link_capabilities(hw,
1649 			    &autoneg, &negotiate);
1650 			if (err)
1651 				return;
1652 		}
1653 
1654 		if (hw->mac.ops.setup_link != NULL) {
1655 			err = hw->mac.ops.setup_link(hw,
1656 			    autoneg, sc->link_up);
1657 			if (err)
1658 				return;
1659 		}
1660 	}
1661 }
1662 
1663 static int
1664 ix_alloc_rings(struct ix_softc *sc)
1665 {
1666 	int error, i;
1667 
1668 	/*
1669 	 * Create top level busdma tag
1670 	 */
1671 	error = bus_dma_tag_create(NULL, 1, 0,
1672 	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
1673 	    BUS_SPACE_MAXSIZE_32BIT, 0, BUS_SPACE_MAXSIZE_32BIT, 0,
1674 	    &sc->parent_tag);
1675 	if (error) {
1676 		device_printf(sc->dev, "could not create top level DMA tag\n");
1677 		return error;
1678 	}
1679 
1680 	/*
1681 	 * Allocate TX descriptor rings and buffers
1682 	 */
1683 	sc->tx_rings = kmalloc_cachealign(
1684 	    sizeof(struct ix_tx_ring) * sc->tx_ring_cnt,
1685 	    M_DEVBUF, M_WAITOK | M_ZERO);
1686 	for (i = 0; i < sc->tx_ring_cnt; ++i) {
1687 		struct ix_tx_ring *txr = &sc->tx_rings[i];
1688 
1689 		txr->tx_sc = sc;
1690 		txr->tx_idx = i;
1691 		txr->tx_intr_vec = -1;
1692 		lwkt_serialize_init(&txr->tx_serialize);
1693 
1694 		error = ix_create_tx_ring(txr);
1695 		if (error)
1696 			return error;
1697 	}
1698 
1699 	/*
1700 	 * Allocate RX descriptor rings and buffers
1701 	 */
1702 	sc->rx_rings = kmalloc_cachealign(
1703 	    sizeof(struct ix_rx_ring) * sc->rx_ring_cnt,
1704 	    M_DEVBUF, M_WAITOK | M_ZERO);
1705 	for (i = 0; i < sc->rx_ring_cnt; ++i) {
1706 		struct ix_rx_ring *rxr = &sc->rx_rings[i];
1707 
1708 		rxr->rx_sc = sc;
1709 		rxr->rx_idx = i;
1710 		rxr->rx_intr_vec = -1;
1711 		lwkt_serialize_init(&rxr->rx_serialize);
1712 
1713 		error = ix_create_rx_ring(rxr);
1714 		if (error)
1715 			return error;
1716 	}
1717 
1718 	return 0;
1719 }
1720 
1721 static int
1722 ix_create_tx_ring(struct ix_tx_ring *txr)
1723 {
1724 	int error, i, tsize, ntxd;
1725 
1726 	/*
1727 	 * Validate number of transmit descriptors.  It must not exceed
1728 	 * hardware maximum, and must be multiple of IX_DBA_ALIGN.
1729 	 */
1730 	ntxd = device_getenv_int(txr->tx_sc->dev, "txd", ix_txd);
1731 	if (((ntxd * sizeof(union ixgbe_adv_tx_desc)) % IX_DBA_ALIGN) != 0 ||
1732 	    ntxd < IX_MIN_TXD || ntxd > IX_MAX_TXD) {
1733 		device_printf(txr->tx_sc->dev,
1734 		    "Using %d TX descriptors instead of %d!\n",
1735 		    IX_DEF_TXD, ntxd);
1736 		txr->tx_ndesc = IX_DEF_TXD;
1737 	} else {
1738 		txr->tx_ndesc = ntxd;
1739 	}
1740 
1741 	/*
1742 	 * Allocate TX head write-back buffer
1743 	 */
1744 	txr->tx_hdr = bus_dmamem_coherent_any(txr->tx_sc->parent_tag,
1745 	    __VM_CACHELINE_SIZE, __VM_CACHELINE_SIZE, BUS_DMA_WAITOK,
1746 	    &txr->tx_hdr_dtag, &txr->tx_hdr_map, &txr->tx_hdr_paddr);
1747 	if (txr->tx_hdr == NULL) {
1748 		device_printf(txr->tx_sc->dev,
1749 		    "Unable to allocate TX head write-back buffer\n");
1750 		return ENOMEM;
1751 	}
1752 
1753 	/*
1754 	 * Allocate TX descriptor ring
1755 	 */
1756 	tsize = roundup2(txr->tx_ndesc * sizeof(union ixgbe_adv_tx_desc),
1757 	    IX_DBA_ALIGN);
1758 	txr->tx_base = bus_dmamem_coherent_any(txr->tx_sc->parent_tag,
1759 	    IX_DBA_ALIGN, tsize, BUS_DMA_WAITOK | BUS_DMA_ZERO,
1760 	    &txr->tx_base_dtag, &txr->tx_base_map, &txr->tx_base_paddr);
1761 	if (txr->tx_base == NULL) {
1762 		device_printf(txr->tx_sc->dev,
1763 		    "Unable to allocate TX Descriptor memory\n");
1764 		return ENOMEM;
1765 	}
1766 
1767 	tsize = __VM_CACHELINE_ALIGN(sizeof(struct ix_tx_buf) * txr->tx_ndesc);
1768 	txr->tx_buf = kmalloc_cachealign(tsize, M_DEVBUF, M_WAITOK | M_ZERO);
1769 
1770 	/*
1771 	 * Create DMA tag for TX buffers
1772 	 */
1773 	error = bus_dma_tag_create(txr->tx_sc->parent_tag,
1774 	    1, 0,		/* alignment, bounds */
1775 	    BUS_SPACE_MAXADDR,	/* lowaddr */
1776 	    BUS_SPACE_MAXADDR,	/* highaddr */
1777 	    NULL, NULL,		/* filter, filterarg */
1778 	    IX_TSO_SIZE,	/* maxsize */
1779 	    IX_MAX_SCATTER,	/* nsegments */
1780 	    PAGE_SIZE,		/* maxsegsize */
1781 	    BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW |
1782 	    BUS_DMA_ONEBPAGE,	/* flags */
1783 	    &txr->tx_tag);
1784 	if (error) {
1785 		device_printf(txr->tx_sc->dev,
1786 		    "Unable to allocate TX DMA tag\n");
1787 		kfree(txr->tx_buf, M_DEVBUF);
1788 		txr->tx_buf = NULL;
1789 		return error;
1790 	}
1791 
1792 	/*
1793 	 * Create DMA maps for TX buffers
1794 	 */
1795 	for (i = 0; i < txr->tx_ndesc; ++i) {
1796 		struct ix_tx_buf *txbuf = &txr->tx_buf[i];
1797 
1798 		error = bus_dmamap_create(txr->tx_tag,
1799 		    BUS_DMA_WAITOK | BUS_DMA_ONEBPAGE, &txbuf->map);
1800 		if (error) {
1801 			device_printf(txr->tx_sc->dev,
1802 			    "Unable to create TX DMA map\n");
1803 			ix_destroy_tx_ring(txr, i);
1804 			return error;
1805 		}
1806 	}
1807 
1808 	/*
1809 	 * Initialize various watermark
1810 	 */
1811 	txr->tx_wreg_nsegs = IX_DEF_TXWREG_NSEGS;
1812 	txr->tx_intr_nsegs = txr->tx_ndesc / 16;
1813 
1814 	return 0;
1815 }
1816 
1817 static void
1818 ix_destroy_tx_ring(struct ix_tx_ring *txr, int ndesc)
1819 {
1820 	int i;
1821 
1822 	if (txr->tx_hdr != NULL) {
1823 		bus_dmamap_unload(txr->tx_hdr_dtag, txr->tx_hdr_map);
1824 		bus_dmamem_free(txr->tx_hdr_dtag,
1825 		    __DEVOLATILE(void *, txr->tx_hdr), txr->tx_hdr_map);
1826 		bus_dma_tag_destroy(txr->tx_hdr_dtag);
1827 		txr->tx_hdr = NULL;
1828 	}
1829 
1830 	if (txr->tx_base != NULL) {
1831 		bus_dmamap_unload(txr->tx_base_dtag, txr->tx_base_map);
1832 		bus_dmamem_free(txr->tx_base_dtag, txr->tx_base,
1833 		    txr->tx_base_map);
1834 		bus_dma_tag_destroy(txr->tx_base_dtag);
1835 		txr->tx_base = NULL;
1836 	}
1837 
1838 	if (txr->tx_buf == NULL)
1839 		return;
1840 
1841 	for (i = 0; i < ndesc; ++i) {
1842 		struct ix_tx_buf *txbuf = &txr->tx_buf[i];
1843 
1844 		KKASSERT(txbuf->m_head == NULL);
1845 		bus_dmamap_destroy(txr->tx_tag, txbuf->map);
1846 	}
1847 	bus_dma_tag_destroy(txr->tx_tag);
1848 
1849 	kfree(txr->tx_buf, M_DEVBUF);
1850 	txr->tx_buf = NULL;
1851 }
1852 
1853 static void
1854 ix_init_tx_ring(struct ix_tx_ring *txr)
1855 {
1856 	/* Clear the old ring contents */
1857 	bzero(txr->tx_base, sizeof(union ixgbe_adv_tx_desc) * txr->tx_ndesc);
1858 
1859 	/* Clear TX head write-back buffer */
1860 	*(txr->tx_hdr) = 0;
1861 
1862 	/* Reset indices */
1863 	txr->tx_next_avail = 0;
1864 	txr->tx_next_clean = 0;
1865 	txr->tx_nsegs = 0;
1866 
1867 	/* Set number of descriptors available */
1868 	txr->tx_avail = txr->tx_ndesc;
1869 
1870 	/* Enable this TX ring */
1871 	txr->tx_flags |= IX_TXFLAG_ENABLED;
1872 }
1873 
1874 static void
1875 ix_init_tx_unit(struct ix_softc *sc)
1876 {
1877 	struct ixgbe_hw	*hw = &sc->hw;
1878 	int i;
1879 
1880 	/*
1881 	 * Setup the Base and Length of the Tx Descriptor Ring
1882 	 */
1883 	for (i = 0; i < sc->tx_ring_inuse; ++i) {
1884 		struct ix_tx_ring *txr = &sc->tx_rings[i];
1885 		uint64_t tdba = txr->tx_base_paddr;
1886 		uint64_t hdr_paddr = txr->tx_hdr_paddr;
1887 		uint32_t txctrl;
1888 
1889 		IXGBE_WRITE_REG(hw, IXGBE_TDBAL(i), (uint32_t)tdba);
1890 		IXGBE_WRITE_REG(hw, IXGBE_TDBAH(i), (uint32_t)(tdba >> 32));
1891 		IXGBE_WRITE_REG(hw, IXGBE_TDLEN(i),
1892 		    txr->tx_ndesc * sizeof(union ixgbe_adv_tx_desc));
1893 
1894 		/* Setup the HW Tx Head and Tail descriptor pointers */
1895 		IXGBE_WRITE_REG(hw, IXGBE_TDH(i), 0);
1896 		IXGBE_WRITE_REG(hw, IXGBE_TDT(i), 0);
1897 
1898 		/* Disable TX head write-back relax ordering */
1899 		switch (hw->mac.type) {
1900 		case ixgbe_mac_82598EB:
1901 			txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(i));
1902 			break;
1903 		case ixgbe_mac_82599EB:
1904 		case ixgbe_mac_X540:
1905 		default:
1906 			txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL_82599(i));
1907 			break;
1908 		}
1909 		txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
1910 		switch (hw->mac.type) {
1911 		case ixgbe_mac_82598EB:
1912 			IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(i), txctrl);
1913 			break;
1914 		case ixgbe_mac_82599EB:
1915 		case ixgbe_mac_X540:
1916 		default:
1917 			IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(i), txctrl);
1918 			break;
1919 		}
1920 
1921 		/* Enable TX head write-back */
1922 		IXGBE_WRITE_REG(hw, IXGBE_TDWBAH(i),
1923 		    (uint32_t)(hdr_paddr >> 32));
1924 		IXGBE_WRITE_REG(hw, IXGBE_TDWBAL(i),
1925 		    ((uint32_t)hdr_paddr) | IXGBE_TDWBAL_HEAD_WB_ENABLE);
1926 	}
1927 
1928 	if (hw->mac.type != ixgbe_mac_82598EB) {
1929 		uint32_t dmatxctl, rttdcs;
1930 
1931 		dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
1932 		dmatxctl |= IXGBE_DMATXCTL_TE;
1933 		IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
1934 
1935 		/* Disable arbiter to set MTQC */
1936 		rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
1937 		rttdcs |= IXGBE_RTTDCS_ARBDIS;
1938 		IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
1939 
1940 		IXGBE_WRITE_REG(hw, IXGBE_MTQC, IXGBE_MTQC_64Q_1PB);
1941 
1942 		/* Reenable aribter */
1943 		rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
1944 		IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
1945 	}
1946 }
1947 
1948 static int
1949 ix_tx_ctx_setup(struct ix_tx_ring *txr, const struct mbuf *mp,
1950     uint32_t *cmd_type_len, uint32_t *olinfo_status)
1951 {
1952 	struct ixgbe_adv_tx_context_desc *TXD;
1953 	uint32_t vlan_macip_lens = 0, type_tucmd_mlhl = 0;
1954 	int ehdrlen, ip_hlen = 0, ctxd;
1955 	boolean_t offload = TRUE;
1956 
1957 	/* First check if TSO is to be used */
1958 	if (mp->m_pkthdr.csum_flags & CSUM_TSO) {
1959 		return ix_tso_ctx_setup(txr, mp,
1960 		    cmd_type_len, olinfo_status);
1961 	}
1962 
1963 	if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
1964 		offload = FALSE;
1965 
1966 	/* Indicate the whole packet as payload when not doing TSO */
1967 	*olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
1968 
1969 	/*
1970 	 * In advanced descriptors the vlan tag must be placed into the
1971 	 * context descriptor.  Hence we need to make one even if not
1972 	 * doing checksum offloads.
1973 	 */
1974 	if (mp->m_flags & M_VLANTAG) {
1975 		vlan_macip_lens |= htole16(mp->m_pkthdr.ether_vlantag) <<
1976 		    IXGBE_ADVTXD_VLAN_SHIFT;
1977 	} else if (!offload) {
1978 		/* No TX descriptor is consumed */
1979 		return 0;
1980 	}
1981 
1982 	/* Set the ether header length */
1983 	ehdrlen = mp->m_pkthdr.csum_lhlen;
1984 	KASSERT(ehdrlen > 0, ("invalid ether hlen"));
1985 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
1986 
1987 	if (mp->m_pkthdr.csum_flags & CSUM_IP) {
1988 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
1989 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
1990 		ip_hlen = mp->m_pkthdr.csum_iphlen;
1991 		KASSERT(ip_hlen > 0, ("invalid ip hlen"));
1992 	}
1993 	vlan_macip_lens |= ip_hlen;
1994 
1995 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
1996 	if (mp->m_pkthdr.csum_flags & CSUM_TCP)
1997 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
1998 	else if (mp->m_pkthdr.csum_flags & CSUM_UDP)
1999 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
2000 
2001 	if (mp->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP))
2002 		*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
2003 
2004 	/* Now ready a context descriptor */
2005 	ctxd = txr->tx_next_avail;
2006 	TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
2007 
2008 	/* Now copy bits into descriptor */
2009 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
2010 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
2011 	TXD->seqnum_seed = htole32(0);
2012 	TXD->mss_l4len_idx = htole32(0);
2013 
2014 	/* We've consumed the first desc, adjust counters */
2015 	if (++ctxd == txr->tx_ndesc)
2016 		ctxd = 0;
2017 	txr->tx_next_avail = ctxd;
2018 	--txr->tx_avail;
2019 
2020 	/* One TX descriptor is consumed */
2021 	return 1;
2022 }
2023 
2024 static int
2025 ix_tso_ctx_setup(struct ix_tx_ring *txr, const struct mbuf *mp,
2026     uint32_t *cmd_type_len, uint32_t *olinfo_status)
2027 {
2028 	struct ixgbe_adv_tx_context_desc *TXD;
2029 	uint32_t vlan_macip_lens = 0, type_tucmd_mlhl = 0;
2030 	uint32_t mss_l4len_idx = 0, paylen;
2031 	int ctxd, ehdrlen, ip_hlen, tcp_hlen;
2032 
2033 	ehdrlen = mp->m_pkthdr.csum_lhlen;
2034 	KASSERT(ehdrlen > 0, ("invalid ether hlen"));
2035 
2036 	ip_hlen = mp->m_pkthdr.csum_iphlen;
2037 	KASSERT(ip_hlen > 0, ("invalid ip hlen"));
2038 
2039 	tcp_hlen = mp->m_pkthdr.csum_thlen;
2040 	KASSERT(tcp_hlen > 0, ("invalid tcp hlen"));
2041 
2042 	ctxd = txr->tx_next_avail;
2043 	TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
2044 
2045 	if (mp->m_flags & M_VLANTAG) {
2046 		vlan_macip_lens |= htole16(mp->m_pkthdr.ether_vlantag) <<
2047 		    IXGBE_ADVTXD_VLAN_SHIFT;
2048 	}
2049 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
2050 	vlan_macip_lens |= ip_hlen;
2051 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
2052 
2053 	/* ADV DTYPE TUCMD */
2054 	type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
2055 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
2056 	type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
2057 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
2058 
2059 	/* MSS L4LEN IDX */
2060 	mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT);
2061 	mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
2062 	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
2063 
2064 	TXD->seqnum_seed = htole32(0);
2065 
2066 	if (++ctxd == txr->tx_ndesc)
2067 		ctxd = 0;
2068 
2069 	txr->tx_avail--;
2070 	txr->tx_next_avail = ctxd;
2071 
2072 	*cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
2073 
2074 	/* This is used in the transmit desc in encap */
2075 	paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
2076 
2077 	*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
2078 	*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
2079 	*olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
2080 
2081 	/* One TX descriptor is consumed */
2082 	return 1;
2083 }
2084 
2085 static void
2086 ix_txeof(struct ix_tx_ring *txr, int hdr)
2087 {
2088 	int first, avail;
2089 
2090 	if (txr->tx_avail == txr->tx_ndesc)
2091 		return;
2092 
2093 	first = txr->tx_next_clean;
2094 	if (first == hdr)
2095 		return;
2096 
2097 	avail = txr->tx_avail;
2098 	while (first != hdr) {
2099 		struct ix_tx_buf *txbuf = &txr->tx_buf[first];
2100 
2101 		++avail;
2102 		if (txbuf->m_head) {
2103 			bus_dmamap_unload(txr->tx_tag, txbuf->map);
2104 			m_freem(txbuf->m_head);
2105 			txbuf->m_head = NULL;
2106 		}
2107 		if (++first == txr->tx_ndesc)
2108 			first = 0;
2109 	}
2110 	txr->tx_next_clean = first;
2111 	txr->tx_avail = avail;
2112 
2113 	if (txr->tx_avail > IX_MAX_SCATTER + IX_TX_RESERVED) {
2114 		ifsq_clr_oactive(txr->tx_ifsq);
2115 		txr->tx_watchdog.wd_timer = 0;
2116 	}
2117 }
2118 
2119 static int
2120 ix_create_rx_ring(struct ix_rx_ring *rxr)
2121 {
2122 	int i, rsize, error, nrxd;
2123 
2124 	/*
2125 	 * Validate number of receive descriptors.  It must not exceed
2126 	 * hardware maximum, and must be multiple of IX_DBA_ALIGN.
2127 	 */
2128 	nrxd = device_getenv_int(rxr->rx_sc->dev, "rxd", ix_rxd);
2129 	if (((nrxd * sizeof(union ixgbe_adv_rx_desc)) % IX_DBA_ALIGN) != 0 ||
2130 	    nrxd < IX_MIN_RXD || nrxd > IX_MAX_RXD) {
2131 		device_printf(rxr->rx_sc->dev,
2132 		    "Using %d RX descriptors instead of %d!\n",
2133 		    IX_DEF_RXD, nrxd);
2134 		rxr->rx_ndesc = IX_DEF_RXD;
2135 	} else {
2136 		rxr->rx_ndesc = nrxd;
2137 	}
2138 
2139 	/*
2140 	 * Allocate RX descriptor ring
2141 	 */
2142 	rsize = roundup2(rxr->rx_ndesc * sizeof(union ixgbe_adv_rx_desc),
2143 	    IX_DBA_ALIGN);
2144 	rxr->rx_base = bus_dmamem_coherent_any(rxr->rx_sc->parent_tag,
2145 	    IX_DBA_ALIGN, rsize, BUS_DMA_WAITOK | BUS_DMA_ZERO,
2146 	    &rxr->rx_base_dtag, &rxr->rx_base_map, &rxr->rx_base_paddr);
2147 	if (rxr->rx_base == NULL) {
2148 		device_printf(rxr->rx_sc->dev,
2149 		    "Unable to allocate TX Descriptor memory\n");
2150 		return ENOMEM;
2151 	}
2152 
2153 	rsize = __VM_CACHELINE_ALIGN(sizeof(struct ix_rx_buf) * rxr->rx_ndesc);
2154 	rxr->rx_buf = kmalloc_cachealign(rsize, M_DEVBUF, M_WAITOK | M_ZERO);
2155 
2156 	/*
2157 	 * Create DMA tag for RX buffers
2158 	 */
2159 	error = bus_dma_tag_create(rxr->rx_sc->parent_tag,
2160 	    1, 0,		/* alignment, bounds */
2161 	    BUS_SPACE_MAXADDR,	/* lowaddr */
2162 	    BUS_SPACE_MAXADDR,	/* highaddr */
2163 	    NULL, NULL,		/* filter, filterarg */
2164 	    PAGE_SIZE,		/* maxsize */
2165 	    1,			/* nsegments */
2166 	    PAGE_SIZE,		/* maxsegsize */
2167 	    BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, /* flags */
2168 	    &rxr->rx_tag);
2169 	if (error) {
2170 		device_printf(rxr->rx_sc->dev,
2171 		    "Unable to create RX DMA tag\n");
2172 		kfree(rxr->rx_buf, M_DEVBUF);
2173 		rxr->rx_buf = NULL;
2174 		return error;
2175 	}
2176 
2177 	/*
2178 	 * Create spare DMA map for RX buffers
2179 	 */
2180 	error = bus_dmamap_create(rxr->rx_tag, BUS_DMA_WAITOK,
2181 	    &rxr->rx_sparemap);
2182 	if (error) {
2183 		device_printf(rxr->rx_sc->dev,
2184 		    "Unable to create spare RX DMA map\n");
2185 		bus_dma_tag_destroy(rxr->rx_tag);
2186 		kfree(rxr->rx_buf, M_DEVBUF);
2187 		rxr->rx_buf = NULL;
2188 		return error;
2189 	}
2190 
2191 	/*
2192 	 * Create DMA maps for RX buffers
2193 	 */
2194 	for (i = 0; i < rxr->rx_ndesc; ++i) {
2195 		struct ix_rx_buf *rxbuf = &rxr->rx_buf[i];
2196 
2197 		error = bus_dmamap_create(rxr->rx_tag,
2198 		    BUS_DMA_WAITOK, &rxbuf->map);
2199 		if (error) {
2200 			device_printf(rxr->rx_sc->dev,
2201 			    "Unable to create RX dma map\n");
2202 			ix_destroy_rx_ring(rxr, i);
2203 			return error;
2204 		}
2205 	}
2206 
2207 	/*
2208 	 * Initialize various watermark
2209 	 */
2210 	rxr->rx_wreg_nsegs = IX_DEF_RXWREG_NSEGS;
2211 
2212 	return 0;
2213 }
2214 
2215 static void
2216 ix_destroy_rx_ring(struct ix_rx_ring *rxr, int ndesc)
2217 {
2218 	int i;
2219 
2220 	if (rxr->rx_base != NULL) {
2221 		bus_dmamap_unload(rxr->rx_base_dtag, rxr->rx_base_map);
2222 		bus_dmamem_free(rxr->rx_base_dtag, rxr->rx_base,
2223 		    rxr->rx_base_map);
2224 		bus_dma_tag_destroy(rxr->rx_base_dtag);
2225 		rxr->rx_base = NULL;
2226 	}
2227 
2228 	if (rxr->rx_buf == NULL)
2229 		return;
2230 
2231 	for (i = 0; i < ndesc; ++i) {
2232 		struct ix_rx_buf *rxbuf = &rxr->rx_buf[i];
2233 
2234 		KKASSERT(rxbuf->m_head == NULL);
2235 		bus_dmamap_destroy(rxr->rx_tag, rxbuf->map);
2236 	}
2237 	bus_dmamap_destroy(rxr->rx_tag, rxr->rx_sparemap);
2238 	bus_dma_tag_destroy(rxr->rx_tag);
2239 
2240 	kfree(rxr->rx_buf, M_DEVBUF);
2241 	rxr->rx_buf = NULL;
2242 }
2243 
2244 /*
2245 ** Used to detect a descriptor that has
2246 ** been merged by Hardware RSC.
2247 */
2248 static __inline uint32_t
2249 ix_rsc_count(union ixgbe_adv_rx_desc *rx)
2250 {
2251 	return (le32toh(rx->wb.lower.lo_dword.data) &
2252 	    IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
2253 }
2254 
2255 #if 0
2256 /*********************************************************************
2257  *
2258  *  Initialize Hardware RSC (LRO) feature on 82599
2259  *  for an RX ring, this is toggled by the LRO capability
2260  *  even though it is transparent to the stack.
2261  *
2262  *  NOTE: since this HW feature only works with IPV4 and
2263  *        our testing has shown soft LRO to be as effective
2264  *        I have decided to disable this by default.
2265  *
2266  **********************************************************************/
2267 static void
2268 ix_setup_hw_rsc(struct ix_rx_ring *rxr)
2269 {
2270 	struct	ix_softc 	*sc = rxr->rx_sc;
2271 	struct	ixgbe_hw	*hw = &sc->hw;
2272 	uint32_t			rscctrl, rdrxctl;
2273 
2274 #if 0
2275 	/* If turning LRO/RSC off we need to disable it */
2276 	if ((sc->arpcom.ac_if.if_capenable & IFCAP_LRO) == 0) {
2277 		rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
2278 		rscctrl &= ~IXGBE_RSCCTL_RSCEN;
2279 		return;
2280 	}
2281 #endif
2282 
2283 	rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
2284 	rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
2285 	rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
2286 	rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
2287 	IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
2288 
2289 	rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
2290 	rscctrl |= IXGBE_RSCCTL_RSCEN;
2291 	/*
2292 	** Limit the total number of descriptors that
2293 	** can be combined, so it does not exceed 64K
2294 	*/
2295 	if (rxr->mbuf_sz == MCLBYTES)
2296 		rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
2297 	else if (rxr->mbuf_sz == MJUMPAGESIZE)
2298 		rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
2299 	else if (rxr->mbuf_sz == MJUM9BYTES)
2300 		rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
2301 	else  /* Using 16K cluster */
2302 		rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
2303 
2304 	IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
2305 
2306 	/* Enable TCP header recognition */
2307 	IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
2308 	    (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
2309 	    IXGBE_PSRTYPE_TCPHDR));
2310 
2311 	/* Disable RSC for ACK packets */
2312 	IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
2313 	    (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
2314 
2315 	rxr->hw_rsc = TRUE;
2316 }
2317 #endif
2318 
2319 static int
2320 ix_init_rx_ring(struct ix_rx_ring *rxr)
2321 {
2322 	int i;
2323 
2324 	/* Clear the ring contents */
2325 	bzero(rxr->rx_base, rxr->rx_ndesc * sizeof(union ixgbe_adv_rx_desc));
2326 
2327 	/* XXX we need JUMPAGESIZE for RSC too */
2328 	if (rxr->rx_sc->max_frame_size <= MCLBYTES)
2329 		rxr->rx_mbuf_sz = MCLBYTES;
2330 	else
2331 		rxr->rx_mbuf_sz = MJUMPAGESIZE;
2332 
2333 	/* Now replenish the mbufs */
2334 	for (i = 0; i < rxr->rx_ndesc; ++i) {
2335 		int error;
2336 
2337 		error = ix_newbuf(rxr, i, TRUE);
2338 		if (error)
2339 			return error;
2340 	}
2341 
2342 	/* Setup our descriptor indices */
2343 	rxr->rx_next_check = 0;
2344 	rxr->rx_flags &= ~IX_RXRING_FLAG_DISC;
2345 
2346 #if 0
2347 	/*
2348 	** Now set up the LRO interface:
2349 	*/
2350 	if (ixgbe_rsc_enable)
2351 		ix_setup_hw_rsc(rxr);
2352 #endif
2353 
2354 	return 0;
2355 }
2356 
2357 #define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT 2
2358 
2359 #define BSIZEPKT_ROUNDUP ((1<<IXGBE_SRRCTL_BSIZEPKT_SHIFT)-1)
2360 
2361 static void
2362 ix_init_rx_unit(struct ix_softc *sc)
2363 {
2364 	struct ixgbe_hw	*hw = &sc->hw;
2365 	struct ifnet *ifp = &sc->arpcom.ac_if;
2366 	uint32_t bufsz, rxctrl, fctrl, rxcsum, hlreg;
2367 	int i;
2368 
2369 	/*
2370 	 * Make sure receives are disabled while setting up the descriptor ring
2371 	 */
2372 	rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
2373 	IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxctrl & ~IXGBE_RXCTRL_RXEN);
2374 
2375 	/* Enable broadcasts */
2376 	fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
2377 	fctrl |= IXGBE_FCTRL_BAM;
2378 	fctrl |= IXGBE_FCTRL_DPF;
2379 	fctrl |= IXGBE_FCTRL_PMCF;
2380 	IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
2381 
2382 	/* Set for Jumbo Frames? */
2383 	hlreg = IXGBE_READ_REG(hw, IXGBE_HLREG0);
2384 	if (ifp->if_mtu > ETHERMTU)
2385 		hlreg |= IXGBE_HLREG0_JUMBOEN;
2386 	else
2387 		hlreg &= ~IXGBE_HLREG0_JUMBOEN;
2388 	IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg);
2389 
2390 	KKASSERT(sc->rx_rings[0].rx_mbuf_sz >= MCLBYTES);
2391 	bufsz = (sc->rx_rings[0].rx_mbuf_sz + BSIZEPKT_ROUNDUP) >>
2392 	    IXGBE_SRRCTL_BSIZEPKT_SHIFT;
2393 
2394 	for (i = 0; i < sc->rx_ring_inuse; ++i) {
2395 		struct ix_rx_ring *rxr = &sc->rx_rings[i];
2396 		uint64_t rdba = rxr->rx_base_paddr;
2397 		uint32_t srrctl;
2398 
2399 		/* Setup the Base and Length of the Rx Descriptor Ring */
2400 		IXGBE_WRITE_REG(hw, IXGBE_RDBAL(i), (uint32_t)rdba);
2401 		IXGBE_WRITE_REG(hw, IXGBE_RDBAH(i), (uint32_t)(rdba >> 32));
2402 		IXGBE_WRITE_REG(hw, IXGBE_RDLEN(i),
2403 		    rxr->rx_ndesc * sizeof(union ixgbe_adv_rx_desc));
2404 
2405 		/*
2406 		 * Set up the SRRCTL register
2407 		 */
2408 		srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
2409 
2410 		srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
2411 		srrctl &= ~IXGBE_SRRCTL_BSIZEPKT_MASK;
2412 		srrctl |= bufsz;
2413 		srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
2414 		if (sc->rx_ring_inuse > 1) {
2415 			/* See the commend near ix_enable_rx_drop() */
2416 			switch (sc->fc) {
2417 			case ixgbe_fc_rx_pause:
2418 			case ixgbe_fc_tx_pause:
2419 			case ixgbe_fc_full:
2420 				srrctl &= ~IXGBE_SRRCTL_DROP_EN;
2421 				if (i == 0 && bootverbose) {
2422 					if_printf(ifp, "flow control %d, "
2423 					    "disable RX drop\n", sc->fc);
2424 				}
2425 				break;
2426 
2427 			case ixgbe_fc_none:
2428 				srrctl |= IXGBE_SRRCTL_DROP_EN;
2429 				if (i == 0 && bootverbose) {
2430 					if_printf(ifp, "flow control %d, "
2431 					    "enable RX drop\n", sc->fc);
2432 				}
2433 				break;
2434 
2435 			default:
2436 				break;
2437 			}
2438 		}
2439 		IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
2440 
2441 		/* Setup the HW Rx Head and Tail Descriptor Pointers */
2442 		IXGBE_WRITE_REG(hw, IXGBE_RDH(i), 0);
2443 		IXGBE_WRITE_REG(hw, IXGBE_RDT(i), 0);
2444 	}
2445 
2446 	if (sc->hw.mac.type != ixgbe_mac_82598EB)
2447 		IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0), 0);
2448 
2449 	rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
2450 
2451 	/*
2452 	 * Setup RSS
2453 	 */
2454 	if (IX_ENABLE_HWRSS(sc)) {
2455 		uint8_t key[IX_NRSSRK * IX_RSSRK_SIZE];
2456 		int j, r;
2457 
2458 		/*
2459 		 * NOTE:
2460 		 * When we reach here, RSS has already been disabled
2461 		 * in ix_stop(), so we could safely configure RSS key
2462 		 * and redirect table.
2463 		 */
2464 
2465 		/*
2466 		 * Configure RSS key
2467 		 */
2468 		toeplitz_get_key(key, sizeof(key));
2469 		for (i = 0; i < IX_NRSSRK; ++i) {
2470 			uint32_t rssrk;
2471 
2472 			rssrk = IX_RSSRK_VAL(key, i);
2473 			IX_RSS_DPRINTF(sc, 1, "rssrk%d 0x%08x\n",
2474 			    i, rssrk);
2475 
2476 			IXGBE_WRITE_REG(hw, IXGBE_RSSRK(i), rssrk);
2477 		}
2478 
2479 		/*
2480 		 * Configure RSS redirect table in following fashion:
2481 		 * (hash & ring_cnt_mask) == rdr_table[(hash & rdr_table_mask)]
2482 		 */
2483 		r = 0;
2484 		for (j = 0; j < IX_NRETA; ++j) {
2485 			uint32_t reta = 0;
2486 
2487 			for (i = 0; i < IX_RETA_SIZE; ++i) {
2488 				uint32_t q;
2489 
2490 				q = r % sc->rx_ring_inuse;
2491 				reta |= q << (8 * i);
2492 				++r;
2493 			}
2494 			IX_RSS_DPRINTF(sc, 1, "reta 0x%08x\n", reta);
2495 			IXGBE_WRITE_REG(hw, IXGBE_RETA(j), reta);
2496 		}
2497 
2498 		/*
2499 		 * Enable multiple receive queues.
2500 		 * Enable IPv4 RSS standard hash functions.
2501 		 */
2502 		IXGBE_WRITE_REG(hw, IXGBE_MRQC,
2503 		    IXGBE_MRQC_RSSEN |
2504 		    IXGBE_MRQC_RSS_FIELD_IPV4 |
2505 		    IXGBE_MRQC_RSS_FIELD_IPV4_TCP);
2506 
2507 		/*
2508 		 * NOTE:
2509 		 * PCSD must be enabled to enable multiple
2510 		 * receive queues.
2511 		 */
2512 		rxcsum |= IXGBE_RXCSUM_PCSD;
2513 	}
2514 
2515 	if (ifp->if_capenable & IFCAP_RXCSUM)
2516 		rxcsum |= IXGBE_RXCSUM_PCSD;
2517 
2518 	IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
2519 }
2520 
2521 static __inline void
2522 ix_rx_refresh(struct ix_rx_ring *rxr, int i)
2523 {
2524 	if (--i < 0)
2525 		i = rxr->rx_ndesc - 1;
2526 	IXGBE_WRITE_REG(&rxr->rx_sc->hw, IXGBE_RDT(rxr->rx_idx), i);
2527 }
2528 
2529 static __inline void
2530 ix_rxcsum(uint32_t staterr, struct mbuf *mp, uint32_t ptype)
2531 {
2532 	if ((ptype &
2533 	     (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_IPV4_EX)) == 0) {
2534 		/* Not IPv4 */
2535 		return;
2536 	}
2537 
2538 	if ((staterr & (IXGBE_RXD_STAT_IPCS | IXGBE_RXDADV_ERR_IPE)) ==
2539 	    IXGBE_RXD_STAT_IPCS)
2540 		mp->m_pkthdr.csum_flags |= CSUM_IP_CHECKED | CSUM_IP_VALID;
2541 
2542 	if ((ptype &
2543 	     (IXGBE_RXDADV_PKTTYPE_TCP | IXGBE_RXDADV_PKTTYPE_UDP)) == 0) {
2544 		/*
2545 		 * - Neither TCP nor UDP
2546 		 * - IPv4 fragment
2547 		 */
2548 		return;
2549 	}
2550 
2551 	if ((staterr & (IXGBE_RXD_STAT_L4CS | IXGBE_RXDADV_ERR_TCPE)) ==
2552 	    IXGBE_RXD_STAT_L4CS) {
2553 		mp->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR |
2554 		    CSUM_FRAG_NOT_CHECKED;
2555 		mp->m_pkthdr.csum_data = htons(0xffff);
2556 	}
2557 }
2558 
2559 static __inline struct pktinfo *
2560 ix_rssinfo(struct mbuf *m, struct pktinfo *pi,
2561     uint32_t hash, uint32_t hashtype, uint32_t ptype)
2562 {
2563 	switch (hashtype) {
2564 	case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
2565 		pi->pi_netisr = NETISR_IP;
2566 		pi->pi_flags = 0;
2567 		pi->pi_l3proto = IPPROTO_TCP;
2568 		break;
2569 
2570 	case IXGBE_RXDADV_RSSTYPE_IPV4:
2571 		if ((ptype & IXGBE_RXDADV_PKTTYPE_UDP) == 0) {
2572 			/* Not UDP or is fragment */
2573 			return NULL;
2574 		}
2575 		pi->pi_netisr = NETISR_IP;
2576 		pi->pi_flags = 0;
2577 		pi->pi_l3proto = IPPROTO_UDP;
2578 		break;
2579 
2580 	default:
2581 		return NULL;
2582 	}
2583 
2584 	m->m_flags |= M_HASH;
2585 	m->m_pkthdr.hash = toeplitz_hash(hash);
2586 	return pi;
2587 }
2588 
2589 static __inline void
2590 ix_setup_rxdesc(union ixgbe_adv_rx_desc *rxd, const struct ix_rx_buf *rxbuf)
2591 {
2592 	rxd->read.pkt_addr = htole64(rxbuf->paddr);
2593 	rxd->wb.upper.status_error = 0;
2594 }
2595 
2596 static void
2597 ix_rx_discard(struct ix_rx_ring *rxr, int i, boolean_t eop)
2598 {
2599 	struct ix_rx_buf *rxbuf = &rxr->rx_buf[i];
2600 
2601 	/*
2602 	 * XXX discard may not be correct
2603 	 */
2604 	if (eop) {
2605 		IFNET_STAT_INC(&rxr->rx_sc->arpcom.ac_if, ierrors, 1);
2606 		rxr->rx_flags &= ~IX_RXRING_FLAG_DISC;
2607 	} else {
2608 		rxr->rx_flags |= IX_RXRING_FLAG_DISC;
2609 	}
2610 	if (rxbuf->fmp != NULL) {
2611 		m_freem(rxbuf->fmp);
2612 		rxbuf->fmp = NULL;
2613 		rxbuf->lmp = NULL;
2614 	}
2615 	ix_setup_rxdesc(&rxr->rx_base[i], rxbuf);
2616 }
2617 
2618 static void
2619 ix_rxeof(struct ix_rx_ring *rxr, int count)
2620 {
2621 	struct ifnet *ifp = &rxr->rx_sc->arpcom.ac_if;
2622 	int i, nsegs = 0, cpuid = mycpuid;
2623 
2624 	i = rxr->rx_next_check;
2625 	while (count != 0) {
2626 		struct ix_rx_buf *rxbuf, *nbuf = NULL;
2627 		union ixgbe_adv_rx_desc	*cur;
2628 		struct mbuf *sendmp = NULL, *mp;
2629 		struct pktinfo *pi = NULL, pi0;
2630 		uint32_t rsc = 0, ptype, staterr, hash, hashtype;
2631 		uint16_t len;
2632 		boolean_t eop;
2633 
2634 		cur = &rxr->rx_base[i];
2635 		staterr = le32toh(cur->wb.upper.status_error);
2636 
2637 		if ((staterr & IXGBE_RXD_STAT_DD) == 0)
2638 			break;
2639 		++nsegs;
2640 
2641 		rxbuf = &rxr->rx_buf[i];
2642 		mp = rxbuf->m_head;
2643 
2644 		len = le16toh(cur->wb.upper.length);
2645 		ptype = le32toh(cur->wb.lower.lo_dword.data) &
2646 		    IXGBE_RXDADV_PKTTYPE_MASK;
2647 		hash = le32toh(cur->wb.lower.hi_dword.rss);
2648 		hashtype = le32toh(cur->wb.lower.lo_dword.data) &
2649 		    IXGBE_RXDADV_RSSTYPE_MASK;
2650 
2651 		eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
2652 		if (eop)
2653 			--count;
2654 
2655 		/*
2656 		 * Make sure bad packets are discarded
2657 		 */
2658 		if ((staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) ||
2659 		    (rxr->rx_flags & IX_RXRING_FLAG_DISC)) {
2660 			ix_rx_discard(rxr, i, eop);
2661 			goto next_desc;
2662 		}
2663 
2664 		bus_dmamap_sync(rxr->rx_tag, rxbuf->map, BUS_DMASYNC_POSTREAD);
2665 		if (ix_newbuf(rxr, i, FALSE) != 0) {
2666 			ix_rx_discard(rxr, i, eop);
2667 			goto next_desc;
2668 		}
2669 
2670 		/*
2671 		 * On 82599 which supports a hardware LRO, packets
2672 		 * need not be fragmented across sequential descriptors,
2673 		 * rather the next descriptor is indicated in bits
2674 		 * of the descriptor.  This also means that we might
2675 		 * proceses more than one packet at a time, something
2676 		 * that has never been true before, it required
2677 		 * eliminating global chain pointers in favor of what
2678 		 * we are doing here.
2679 		 */
2680 		if (!eop) {
2681 			int nextp;
2682 
2683 			/*
2684 			 * Figure out the next descriptor
2685 			 * of this frame.
2686 			 */
2687 			if (rxr->rx_flags & IX_RXRING_FLAG_LRO)
2688 				rsc = ix_rsc_count(cur);
2689 			if (rsc) { /* Get hardware index */
2690 				nextp = ((staterr &
2691 				    IXGBE_RXDADV_NEXTP_MASK) >>
2692 				    IXGBE_RXDADV_NEXTP_SHIFT);
2693 			} else { /* Just sequential */
2694 				nextp = i + 1;
2695 				if (nextp == rxr->rx_ndesc)
2696 					nextp = 0;
2697 			}
2698 			nbuf = &rxr->rx_buf[nextp];
2699 			prefetch(nbuf);
2700 		}
2701 		mp->m_len = len;
2702 
2703 		/*
2704 		 * Rather than using the fmp/lmp global pointers
2705 		 * we now keep the head of a packet chain in the
2706 		 * buffer struct and pass this along from one
2707 		 * descriptor to the next, until we get EOP.
2708 		 */
2709 		if (rxbuf->fmp == NULL) {
2710 			mp->m_pkthdr.len = len;
2711 			rxbuf->fmp = mp;
2712 			rxbuf->lmp = mp;
2713 		} else {
2714 			rxbuf->fmp->m_pkthdr.len += len;
2715 			rxbuf->lmp->m_next = mp;
2716 			rxbuf->lmp = mp;
2717 		}
2718 
2719 		if (nbuf != NULL) {
2720 			/*
2721 			 * Not the last fragment of this frame,
2722 			 * pass this fragment list on
2723 			 */
2724 			nbuf->fmp = rxbuf->fmp;
2725 			nbuf->lmp = rxbuf->lmp;
2726 		} else {
2727 			/*
2728 			 * Send this frame
2729 			 */
2730 			sendmp = rxbuf->fmp;
2731 
2732 			sendmp->m_pkthdr.rcvif = ifp;
2733 			IFNET_STAT_INC(ifp, ipackets, 1);
2734 #ifdef IX_RSS_DEBUG
2735 			rxr->rx_pkts++;
2736 #endif
2737 
2738 			/* Process vlan info */
2739 			if (staterr & IXGBE_RXD_STAT_VP) {
2740 				sendmp->m_pkthdr.ether_vlantag =
2741 				    le16toh(cur->wb.upper.vlan);
2742 				sendmp->m_flags |= M_VLANTAG;
2743 			}
2744 			if (ifp->if_capenable & IFCAP_RXCSUM)
2745 				ix_rxcsum(staterr, sendmp, ptype);
2746 			if (ifp->if_capenable & IFCAP_RSS) {
2747 				pi = ix_rssinfo(sendmp, &pi0,
2748 				    hash, hashtype, ptype);
2749 			}
2750 		}
2751 		rxbuf->fmp = NULL;
2752 		rxbuf->lmp = NULL;
2753 next_desc:
2754 		/* Advance our pointers to the next descriptor. */
2755 		if (++i == rxr->rx_ndesc)
2756 			i = 0;
2757 
2758 		if (sendmp != NULL)
2759 			ifp->if_input(ifp, sendmp, pi, cpuid);
2760 
2761 		if (nsegs >= rxr->rx_wreg_nsegs) {
2762 			ix_rx_refresh(rxr, i);
2763 			nsegs = 0;
2764 		}
2765 	}
2766 	rxr->rx_next_check = i;
2767 
2768 	if (nsegs > 0)
2769 		ix_rx_refresh(rxr, i);
2770 }
2771 
2772 static void
2773 ix_set_vlan(struct ix_softc *sc)
2774 {
2775 	struct ixgbe_hw *hw = &sc->hw;
2776 	uint32_t ctrl;
2777 
2778 	if (hw->mac.type == ixgbe_mac_82598EB) {
2779 		ctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
2780 		ctrl |= IXGBE_VLNCTRL_VME;
2781 		IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, ctrl);
2782 	} else {
2783 		int i;
2784 
2785 		/*
2786 		 * On 82599 and later chips the VLAN enable is
2787 		 * per queue in RXDCTL
2788 		 */
2789 		for (i = 0; i < sc->rx_ring_inuse; ++i) {
2790 			ctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
2791 			ctrl |= IXGBE_RXDCTL_VME;
2792 			IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), ctrl);
2793 		}
2794 	}
2795 }
2796 
2797 static void
2798 ix_enable_intr(struct ix_softc *sc)
2799 {
2800 	struct ixgbe_hw	*hw = &sc->hw;
2801 	uint32_t fwsm;
2802 	int i;
2803 
2804 	for (i = 0; i < sc->intr_cnt; ++i)
2805 		lwkt_serialize_handler_enable(sc->intr_data[i].intr_serialize);
2806 
2807 	sc->intr_mask = (IXGBE_EIMS_ENABLE_MASK & ~IXGBE_EIMS_RTX_QUEUE);
2808 
2809 	/* Enable Fan Failure detection */
2810 	if (hw->device_id == IXGBE_DEV_ID_82598AT)
2811 		sc->intr_mask |= IXGBE_EIMS_GPI_SDP1;
2812 
2813 	switch (sc->hw.mac.type) {
2814 	case ixgbe_mac_82599EB:
2815 		sc->intr_mask |= IXGBE_EIMS_ECC;
2816 		sc->intr_mask |= IXGBE_EIMS_GPI_SDP0;
2817 		sc->intr_mask |= IXGBE_EIMS_GPI_SDP1;
2818 		sc->intr_mask |= IXGBE_EIMS_GPI_SDP2;
2819 		break;
2820 
2821 	case ixgbe_mac_X540:
2822 		sc->intr_mask |= IXGBE_EIMS_ECC;
2823 		/* Detect if Thermal Sensor is enabled */
2824 		fwsm = IXGBE_READ_REG(hw, IXGBE_FWSM);
2825 		if (fwsm & IXGBE_FWSM_TS_ENABLED)
2826 			sc->intr_mask |= IXGBE_EIMS_TS;
2827 		/* FALL THROUGH */
2828 	default:
2829 		break;
2830 	}
2831 
2832 	/* With MSI-X we use auto clear for RX and TX rings */
2833 	if (sc->intr_type == PCI_INTR_TYPE_MSIX) {
2834 		/*
2835 		 * There are no EIAC1/EIAC2 for newer chips; the related
2836 		 * bits for TX and RX rings > 16 are always auto clear.
2837 		 *
2838 		 * XXX which bits?  There are _no_ documented EICR1 and
2839 		 * EICR2 at all; only EICR.
2840 		 */
2841 		IXGBE_WRITE_REG(hw, IXGBE_EIAC, IXGBE_EIMS_RTX_QUEUE);
2842 	} else {
2843 		sc->intr_mask |= IX_TX_INTR_MASK | IX_RX0_INTR_MASK;
2844 
2845 		KKASSERT(sc->rx_ring_inuse <= IX_MIN_RXRING_RSS);
2846 		if (sc->rx_ring_inuse == IX_MIN_RXRING_RSS)
2847 			sc->intr_mask |= IX_RX1_INTR_MASK;
2848 	}
2849 
2850 	IXGBE_WRITE_REG(hw, IXGBE_EIMS, sc->intr_mask);
2851 
2852 	/*
2853 	 * Enable RX and TX rings for MSI-X
2854 	 */
2855 	if (sc->intr_type == PCI_INTR_TYPE_MSIX) {
2856 		for (i = 0; i < sc->tx_ring_inuse; ++i) {
2857 			const struct ix_tx_ring *txr = &sc->tx_rings[i];
2858 
2859 			if (txr->tx_intr_vec >= 0) {
2860 				IXGBE_WRITE_REG(hw, txr->tx_eims,
2861 				    txr->tx_eims_val);
2862 			}
2863 		}
2864 		for (i = 0; i < sc->rx_ring_inuse; ++i) {
2865 			const struct ix_rx_ring *rxr = &sc->rx_rings[i];
2866 
2867 			KKASSERT(rxr->rx_intr_vec >= 0);
2868 			IXGBE_WRITE_REG(hw, rxr->rx_eims, rxr->rx_eims_val);
2869 		}
2870 	}
2871 
2872 	IXGBE_WRITE_FLUSH(hw);
2873 }
2874 
2875 static void
2876 ix_disable_intr(struct ix_softc *sc)
2877 {
2878 	int i;
2879 
2880 	if (sc->intr_type == PCI_INTR_TYPE_MSIX)
2881 		IXGBE_WRITE_REG(&sc->hw, IXGBE_EIAC, 0);
2882 
2883 	if (sc->hw.mac.type == ixgbe_mac_82598EB) {
2884 		IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMC, ~0);
2885 	} else {
2886 		IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMC, 0xFFFF0000);
2887 		IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMC_EX(0), ~0);
2888 		IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMC_EX(1), ~0);
2889 	}
2890 	IXGBE_WRITE_FLUSH(&sc->hw);
2891 
2892 	for (i = 0; i < sc->intr_cnt; ++i)
2893 		lwkt_serialize_handler_disable(sc->intr_data[i].intr_serialize);
2894 }
2895 
2896 uint16_t
2897 ixgbe_read_pci_cfg(struct ixgbe_hw *hw, uint32_t reg)
2898 {
2899 	return pci_read_config(((struct ixgbe_osdep *)hw->back)->dev,
2900 	    reg, 2);
2901 }
2902 
2903 void
2904 ixgbe_write_pci_cfg(struct ixgbe_hw *hw, uint32_t reg, uint16_t value)
2905 {
2906 	pci_write_config(((struct ixgbe_osdep *)hw->back)->dev,
2907 	    reg, value, 2);
2908 }
2909 
2910 static void
2911 ix_slot_info(struct ix_softc *sc)
2912 {
2913 	struct ixgbe_hw *hw = &sc->hw;
2914 	device_t dev = sc->dev;
2915 	struct ixgbe_mac_info *mac = &hw->mac;
2916 	uint16_t link;
2917 	uint32_t offset;
2918 
2919 	/* For most devices simply call the shared code routine */
2920 	if (hw->device_id != IXGBE_DEV_ID_82599_SFP_SF_QP) {
2921 		ixgbe_get_bus_info(hw);
2922 		goto display;
2923 	}
2924 
2925 	/*
2926 	 * For the Quad port adapter we need to parse back
2927 	 * up the PCI tree to find the speed of the expansion
2928 	 * slot into which this adapter is plugged. A bit more work.
2929 	 */
2930 	dev = device_get_parent(device_get_parent(dev));
2931 #ifdef IXGBE_DEBUG
2932 	device_printf(dev, "parent pcib = %x,%x,%x\n",
2933 	    pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev));
2934 #endif
2935 	dev = device_get_parent(device_get_parent(dev));
2936 #ifdef IXGBE_DEBUG
2937 	device_printf(dev, "slot pcib = %x,%x,%x\n",
2938 	    pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev));
2939 #endif
2940 	/* Now get the PCI Express Capabilities offset */
2941 	offset = pci_get_pciecap_ptr(dev);
2942 	/* ...and read the Link Status Register */
2943 	link = pci_read_config(dev, offset + PCIER_LINKSTAT, 2);
2944 	switch (link & IXGBE_PCI_LINK_WIDTH) {
2945 	case IXGBE_PCI_LINK_WIDTH_1:
2946 		hw->bus.width = ixgbe_bus_width_pcie_x1;
2947 		break;
2948 	case IXGBE_PCI_LINK_WIDTH_2:
2949 		hw->bus.width = ixgbe_bus_width_pcie_x2;
2950 		break;
2951 	case IXGBE_PCI_LINK_WIDTH_4:
2952 		hw->bus.width = ixgbe_bus_width_pcie_x4;
2953 		break;
2954 	case IXGBE_PCI_LINK_WIDTH_8:
2955 		hw->bus.width = ixgbe_bus_width_pcie_x8;
2956 		break;
2957 	default:
2958 		hw->bus.width = ixgbe_bus_width_unknown;
2959 		break;
2960 	}
2961 
2962 	switch (link & IXGBE_PCI_LINK_SPEED) {
2963 	case IXGBE_PCI_LINK_SPEED_2500:
2964 		hw->bus.speed = ixgbe_bus_speed_2500;
2965 		break;
2966 	case IXGBE_PCI_LINK_SPEED_5000:
2967 		hw->bus.speed = ixgbe_bus_speed_5000;
2968 		break;
2969 	case IXGBE_PCI_LINK_SPEED_8000:
2970 		hw->bus.speed = ixgbe_bus_speed_8000;
2971 		break;
2972 	default:
2973 		hw->bus.speed = ixgbe_bus_speed_unknown;
2974 		break;
2975 	}
2976 
2977 	mac->ops.set_lan_id(hw);
2978 
2979 display:
2980 	device_printf(dev, "PCI Express Bus: Speed %s %s\n",
2981 	    hw->bus.speed == ixgbe_bus_speed_8000 ? "8.0GT/s" :
2982 	    hw->bus.speed == ixgbe_bus_speed_5000 ? "5.0GT/s" :
2983 	    hw->bus.speed == ixgbe_bus_speed_2500 ? "2.5GT/s" : "Unknown",
2984 	    hw->bus.width == ixgbe_bus_width_pcie_x8 ? "Width x8" :
2985 	    hw->bus.width == ixgbe_bus_width_pcie_x4 ? "Width x4" :
2986 	    hw->bus.width == ixgbe_bus_width_pcie_x1 ? "Width x1" : "Unknown");
2987 
2988 	if (hw->device_id != IXGBE_DEV_ID_82599_SFP_SF_QP &&
2989 	    hw->bus.width <= ixgbe_bus_width_pcie_x4 &&
2990 	    hw->bus.speed == ixgbe_bus_speed_2500) {
2991 		device_printf(dev, "For optimal performance a x8 "
2992 		    "PCIE, or x4 PCIE Gen2 slot is required.\n");
2993 	} else if (hw->device_id == IXGBE_DEV_ID_82599_SFP_SF_QP &&
2994 	    hw->bus.width <= ixgbe_bus_width_pcie_x8 &&
2995 	    hw->bus.speed < ixgbe_bus_speed_8000) {
2996 		device_printf(dev, "For optimal performance a x8 "
2997 		    "PCIE Gen3 slot is required.\n");
2998 	}
2999 }
3000 
3001 /*
3002  * TODO comment is incorrect
3003  *
3004  * Setup the correct IVAR register for a particular MSIX interrupt
3005  * - entry is the register array entry
3006  * - vector is the MSIX vector for this queue
3007  * - type is RX/TX/MISC
3008  */
3009 static void
3010 ix_set_ivar(struct ix_softc *sc, uint8_t entry, uint8_t vector,
3011     int8_t type)
3012 {
3013 	struct ixgbe_hw *hw = &sc->hw;
3014 	uint32_t ivar, index;
3015 
3016 	vector |= IXGBE_IVAR_ALLOC_VAL;
3017 
3018 	switch (hw->mac.type) {
3019 	case ixgbe_mac_82598EB:
3020 		if (type == -1)
3021 			entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
3022 		else
3023 			entry += (type * 64);
3024 		index = (entry >> 2) & 0x1F;
3025 		ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
3026 		ivar &= ~(0xFF << (8 * (entry & 0x3)));
3027 		ivar |= (vector << (8 * (entry & 0x3)));
3028 		IXGBE_WRITE_REG(hw, IXGBE_IVAR(index), ivar);
3029 		break;
3030 
3031 	case ixgbe_mac_82599EB:
3032 	case ixgbe_mac_X540:
3033 		if (type == -1) { /* MISC IVAR */
3034 			index = (entry & 1) * 8;
3035 			ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
3036 			ivar &= ~(0xFF << index);
3037 			ivar |= (vector << index);
3038 			IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
3039 		} else {	/* RX/TX IVARS */
3040 			index = (16 * (entry & 1)) + (8 * type);
3041 			ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
3042 			ivar &= ~(0xFF << index);
3043 			ivar |= (vector << index);
3044 			IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
3045 		}
3046 
3047 	default:
3048 		break;
3049 	}
3050 }
3051 
3052 static boolean_t
3053 ix_sfp_probe(struct ix_softc *sc)
3054 {
3055 	struct ixgbe_hw	*hw = &sc->hw;
3056 
3057 	if (hw->phy.type == ixgbe_phy_nl &&
3058 	    hw->phy.sfp_type == ixgbe_sfp_type_not_present) {
3059 		int32_t ret;
3060 
3061 		ret = hw->phy.ops.identify_sfp(hw);
3062 		if (ret)
3063 			return FALSE;
3064 
3065 		ret = hw->phy.ops.reset(hw);
3066 		if (ret == IXGBE_ERR_SFP_NOT_SUPPORTED) {
3067 			if_printf(&sc->arpcom.ac_if,
3068 			     "Unsupported SFP+ module detected!  "
3069 			     "Reload driver with supported module.\n");
3070 			sc->sfp_probe = FALSE;
3071 			return FALSE;
3072 		}
3073 		if_printf(&sc->arpcom.ac_if, "SFP+ module detected!\n");
3074 
3075 		/* We now have supported optics */
3076 		sc->sfp_probe = FALSE;
3077 		/* Set the optics type so system reports correctly */
3078 		ix_setup_optics(sc);
3079 
3080 		return TRUE;
3081 	}
3082 	return FALSE;
3083 }
3084 
3085 static void
3086 ix_handle_link(struct ix_softc *sc)
3087 {
3088 	ixgbe_check_link(&sc->hw, &sc->link_speed, &sc->link_up, 0);
3089 	ix_update_link_status(sc);
3090 }
3091 
3092 /*
3093  * Handling SFP module
3094  */
3095 static void
3096 ix_handle_mod(struct ix_softc *sc)
3097 {
3098 	struct ixgbe_hw *hw = &sc->hw;
3099 	uint32_t err;
3100 
3101 	err = hw->phy.ops.identify_sfp(hw);
3102 	if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
3103 		if_printf(&sc->arpcom.ac_if,
3104 		    "Unsupported SFP+ module type was detected.\n");
3105 		return;
3106 	}
3107 	err = hw->mac.ops.setup_sfp(hw);
3108 	if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
3109 		if_printf(&sc->arpcom.ac_if,
3110 		    "Setup failure - unsupported SFP+ module type.\n");
3111 		return;
3112 	}
3113 	ix_handle_msf(sc);
3114 }
3115 
3116 /*
3117  * Handling MSF (multispeed fiber)
3118  */
3119 static void
3120 ix_handle_msf(struct ix_softc *sc)
3121 {
3122 	struct ixgbe_hw *hw = &sc->hw;
3123 	uint32_t autoneg;
3124 
3125 	autoneg = hw->phy.autoneg_advertised;
3126 	if (!autoneg && hw->mac.ops.get_link_capabilities != NULL) {
3127 		bool negotiate;
3128 
3129 		hw->mac.ops.get_link_capabilities(hw, &autoneg, &negotiate);
3130 	}
3131 	if (hw->mac.ops.setup_link != NULL)
3132 		hw->mac.ops.setup_link(hw, autoneg, TRUE);
3133 }
3134 
3135 static void
3136 ix_update_stats(struct ix_softc *sc)
3137 {
3138 	struct ifnet *ifp = &sc->arpcom.ac_if;
3139 	struct ixgbe_hw *hw = &sc->hw;
3140 	uint32_t missed_rx = 0, bprc, lxon, lxoff, total;
3141 	uint64_t total_missed_rx = 0;
3142 	int i;
3143 
3144 	sc->stats.crcerrs += IXGBE_READ_REG(hw, IXGBE_CRCERRS);
3145 	sc->stats.illerrc += IXGBE_READ_REG(hw, IXGBE_ILLERRC);
3146 	sc->stats.errbc += IXGBE_READ_REG(hw, IXGBE_ERRBC);
3147 	sc->stats.mspdc += IXGBE_READ_REG(hw, IXGBE_MSPDC);
3148 
3149 	/*
3150 	 * Note: These are for the 8 possible traffic classes, which
3151 	 * in current implementation is unused, therefore only 0 should
3152 	 * read real data.
3153 	 */
3154 	for (i = 0; i < 8; i++) {
3155 		uint32_t mp;
3156 
3157 		mp = IXGBE_READ_REG(hw, IXGBE_MPC(i));
3158 		/* missed_rx tallies misses for the gprc workaround */
3159 		missed_rx += mp;
3160 		/* global total per queue */
3161 		sc->stats.mpc[i] += mp;
3162 
3163 		/* Running comprehensive total for stats display */
3164 		total_missed_rx += sc->stats.mpc[i];
3165 
3166 		if (hw->mac.type == ixgbe_mac_82598EB) {
3167 			sc->stats.rnbc[i] += IXGBE_READ_REG(hw, IXGBE_RNBC(i));
3168 			sc->stats.qbtc[i] += IXGBE_READ_REG(hw, IXGBE_QBTC(i));
3169 			sc->stats.qbrc[i] += IXGBE_READ_REG(hw, IXGBE_QBRC(i));
3170 			sc->stats.pxonrxc[i] +=
3171 			    IXGBE_READ_REG(hw, IXGBE_PXONRXC(i));
3172 		} else {
3173 			sc->stats.pxonrxc[i] +=
3174 			    IXGBE_READ_REG(hw, IXGBE_PXONRXCNT(i));
3175 		}
3176 		sc->stats.pxontxc[i] += IXGBE_READ_REG(hw, IXGBE_PXONTXC(i));
3177 		sc->stats.pxofftxc[i] += IXGBE_READ_REG(hw, IXGBE_PXOFFTXC(i));
3178 		sc->stats.pxoffrxc[i] += IXGBE_READ_REG(hw, IXGBE_PXOFFRXC(i));
3179 		sc->stats.pxon2offc[i] +=
3180 		    IXGBE_READ_REG(hw, IXGBE_PXON2OFFCNT(i));
3181 	}
3182 	for (i = 0; i < 16; i++) {
3183 		sc->stats.qprc[i] += IXGBE_READ_REG(hw, IXGBE_QPRC(i));
3184 		sc->stats.qptc[i] += IXGBE_READ_REG(hw, IXGBE_QPTC(i));
3185 		sc->stats.qprdc[i] += IXGBE_READ_REG(hw, IXGBE_QPRDC(i));
3186 	}
3187 	sc->stats.mlfc += IXGBE_READ_REG(hw, IXGBE_MLFC);
3188 	sc->stats.mrfc += IXGBE_READ_REG(hw, IXGBE_MRFC);
3189 	sc->stats.rlec += IXGBE_READ_REG(hw, IXGBE_RLEC);
3190 
3191 	/* Hardware workaround, gprc counts missed packets */
3192 	sc->stats.gprc += IXGBE_READ_REG(hw, IXGBE_GPRC);
3193 	sc->stats.gprc -= missed_rx;
3194 
3195 	if (hw->mac.type != ixgbe_mac_82598EB) {
3196 		sc->stats.gorc += IXGBE_READ_REG(hw, IXGBE_GORCL) +
3197 		    ((uint64_t)IXGBE_READ_REG(hw, IXGBE_GORCH) << 32);
3198 		sc->stats.gotc += IXGBE_READ_REG(hw, IXGBE_GOTCL) +
3199 		    ((uint64_t)IXGBE_READ_REG(hw, IXGBE_GOTCH) << 32);
3200 		sc->stats.tor += IXGBE_READ_REG(hw, IXGBE_TORL) +
3201 		    ((uint64_t)IXGBE_READ_REG(hw, IXGBE_TORH) << 32);
3202 		sc->stats.lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXCNT);
3203 		sc->stats.lxoffrxc += IXGBE_READ_REG(hw, IXGBE_LXOFFRXCNT);
3204 	} else {
3205 		sc->stats.lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXC);
3206 		sc->stats.lxoffrxc += IXGBE_READ_REG(hw, IXGBE_LXOFFRXC);
3207 		/* 82598 only has a counter in the high register */
3208 		sc->stats.gorc += IXGBE_READ_REG(hw, IXGBE_GORCH);
3209 		sc->stats.gotc += IXGBE_READ_REG(hw, IXGBE_GOTCH);
3210 		sc->stats.tor += IXGBE_READ_REG(hw, IXGBE_TORH);
3211 	}
3212 
3213 	/*
3214 	 * Workaround: mprc hardware is incorrectly counting
3215 	 * broadcasts, so for now we subtract those.
3216 	 */
3217 	bprc = IXGBE_READ_REG(hw, IXGBE_BPRC);
3218 	sc->stats.bprc += bprc;
3219 	sc->stats.mprc += IXGBE_READ_REG(hw, IXGBE_MPRC);
3220 	if (hw->mac.type == ixgbe_mac_82598EB)
3221 		sc->stats.mprc -= bprc;
3222 
3223 	sc->stats.prc64 += IXGBE_READ_REG(hw, IXGBE_PRC64);
3224 	sc->stats.prc127 += IXGBE_READ_REG(hw, IXGBE_PRC127);
3225 	sc->stats.prc255 += IXGBE_READ_REG(hw, IXGBE_PRC255);
3226 	sc->stats.prc511 += IXGBE_READ_REG(hw, IXGBE_PRC511);
3227 	sc->stats.prc1023 += IXGBE_READ_REG(hw, IXGBE_PRC1023);
3228 	sc->stats.prc1522 += IXGBE_READ_REG(hw, IXGBE_PRC1522);
3229 
3230 	lxon = IXGBE_READ_REG(hw, IXGBE_LXONTXC);
3231 	sc->stats.lxontxc += lxon;
3232 	lxoff = IXGBE_READ_REG(hw, IXGBE_LXOFFTXC);
3233 	sc->stats.lxofftxc += lxoff;
3234 	total = lxon + lxoff;
3235 
3236 	sc->stats.gptc += IXGBE_READ_REG(hw, IXGBE_GPTC);
3237 	sc->stats.mptc += IXGBE_READ_REG(hw, IXGBE_MPTC);
3238 	sc->stats.ptc64 += IXGBE_READ_REG(hw, IXGBE_PTC64);
3239 	sc->stats.gptc -= total;
3240 	sc->stats.mptc -= total;
3241 	sc->stats.ptc64 -= total;
3242 	sc->stats.gotc -= total * ETHER_MIN_LEN;
3243 
3244 	sc->stats.ruc += IXGBE_READ_REG(hw, IXGBE_RUC);
3245 	sc->stats.rfc += IXGBE_READ_REG(hw, IXGBE_RFC);
3246 	sc->stats.roc += IXGBE_READ_REG(hw, IXGBE_ROC);
3247 	sc->stats.rjc += IXGBE_READ_REG(hw, IXGBE_RJC);
3248 	sc->stats.mngprc += IXGBE_READ_REG(hw, IXGBE_MNGPRC);
3249 	sc->stats.mngpdc += IXGBE_READ_REG(hw, IXGBE_MNGPDC);
3250 	sc->stats.mngptc += IXGBE_READ_REG(hw, IXGBE_MNGPTC);
3251 	sc->stats.tpr += IXGBE_READ_REG(hw, IXGBE_TPR);
3252 	sc->stats.tpt += IXGBE_READ_REG(hw, IXGBE_TPT);
3253 	sc->stats.ptc127 += IXGBE_READ_REG(hw, IXGBE_PTC127);
3254 	sc->stats.ptc255 += IXGBE_READ_REG(hw, IXGBE_PTC255);
3255 	sc->stats.ptc511 += IXGBE_READ_REG(hw, IXGBE_PTC511);
3256 	sc->stats.ptc1023 += IXGBE_READ_REG(hw, IXGBE_PTC1023);
3257 	sc->stats.ptc1522 += IXGBE_READ_REG(hw, IXGBE_PTC1522);
3258 	sc->stats.bptc += IXGBE_READ_REG(hw, IXGBE_BPTC);
3259 	sc->stats.xec += IXGBE_READ_REG(hw, IXGBE_XEC);
3260 	sc->stats.fccrc += IXGBE_READ_REG(hw, IXGBE_FCCRC);
3261 	sc->stats.fclast += IXGBE_READ_REG(hw, IXGBE_FCLAST);
3262 	/* Only read FCOE on 82599 */
3263 	if (hw->mac.type != ixgbe_mac_82598EB) {
3264 		sc->stats.fcoerpdc += IXGBE_READ_REG(hw, IXGBE_FCOERPDC);
3265 		sc->stats.fcoeprc += IXGBE_READ_REG(hw, IXGBE_FCOEPRC);
3266 		sc->stats.fcoeptc += IXGBE_READ_REG(hw, IXGBE_FCOEPTC);
3267 		sc->stats.fcoedwrc += IXGBE_READ_REG(hw, IXGBE_FCOEDWRC);
3268 		sc->stats.fcoedwtc += IXGBE_READ_REG(hw, IXGBE_FCOEDWTC);
3269 	}
3270 
3271 	/* Rx Errors */
3272 	IFNET_STAT_SET(ifp, iqdrops, total_missed_rx);
3273 	IFNET_STAT_SET(ifp, ierrors, sc->stats.crcerrs + sc->stats.rlec);
3274 }
3275 
3276 #if 0
3277 /*
3278  * Add sysctl variables, one per statistic, to the system.
3279  */
3280 static void
3281 ix_add_hw_stats(struct ix_softc *sc)
3282 {
3283 
3284 	device_t dev = sc->dev;
3285 
3286 	struct ix_tx_ring *txr = sc->tx_rings;
3287 	struct ix_rx_ring *rxr = sc->rx_rings;
3288 
3289 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
3290 	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
3291 	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
3292 	struct ixgbe_hw_stats *stats = &sc->stats;
3293 
3294 	struct sysctl_oid *stat_node, *queue_node;
3295 	struct sysctl_oid_list *stat_list, *queue_list;
3296 
3297 #define QUEUE_NAME_LEN 32
3298 	char namebuf[QUEUE_NAME_LEN];
3299 
3300 	/* MAC stats get the own sub node */
3301 
3302 	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
3303 				    CTLFLAG_RD, NULL, "MAC Statistics");
3304 	stat_list = SYSCTL_CHILDREN(stat_node);
3305 
3306 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
3307 			CTLFLAG_RD, &stats->crcerrs,
3308 			"CRC Errors");
3309 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "ill_errs",
3310 			CTLFLAG_RD, &stats->illerrc,
3311 			"Illegal Byte Errors");
3312 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "byte_errs",
3313 			CTLFLAG_RD, &stats->errbc,
3314 			"Byte Errors");
3315 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "short_discards",
3316 			CTLFLAG_RD, &stats->mspdc,
3317 			"MAC Short Packets Discarded");
3318 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "local_faults",
3319 			CTLFLAG_RD, &stats->mlfc,
3320 			"MAC Local Faults");
3321 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "remote_faults",
3322 			CTLFLAG_RD, &stats->mrfc,
3323 			"MAC Remote Faults");
3324 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rec_len_errs",
3325 			CTLFLAG_RD, &stats->rlec,
3326 			"Receive Length Errors");
3327 
3328 	/* Flow Control stats */
3329 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
3330 			CTLFLAG_RD, &stats->lxontxc,
3331 			"Link XON Transmitted");
3332 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
3333 			CTLFLAG_RD, &stats->lxonrxc,
3334 			"Link XON Received");
3335 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
3336 			CTLFLAG_RD, &stats->lxofftxc,
3337 			"Link XOFF Transmitted");
3338 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
3339 			CTLFLAG_RD, &stats->lxoffrxc,
3340 			"Link XOFF Received");
3341 
3342 	/* Packet Reception Stats */
3343 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_octets_rcvd",
3344 			CTLFLAG_RD, &stats->tor,
3345 			"Total Octets Received");
3346 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_rcvd",
3347 			CTLFLAG_RD, &stats->gorc,
3348 			"Good Octets Received");
3349 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_rcvd",
3350 			CTLFLAG_RD, &stats->tpr,
3351 			"Total Packets Received");
3352 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_rcvd",
3353 			CTLFLAG_RD, &stats->gprc,
3354 			"Good Packets Received");
3355 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_rcvd",
3356 			CTLFLAG_RD, &stats->mprc,
3357 			"Multicast Packets Received");
3358 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_rcvd",
3359 			CTLFLAG_RD, &stats->bprc,
3360 			"Broadcast Packets Received");
3361 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
3362 			CTLFLAG_RD, &stats->prc64,
3363 			"64 byte frames received ");
3364 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
3365 			CTLFLAG_RD, &stats->prc127,
3366 			"65-127 byte frames received");
3367 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
3368 			CTLFLAG_RD, &stats->prc255,
3369 			"128-255 byte frames received");
3370 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
3371 			CTLFLAG_RD, &stats->prc511,
3372 			"256-511 byte frames received");
3373 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
3374 			CTLFLAG_RD, &stats->prc1023,
3375 			"512-1023 byte frames received");
3376 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
3377 			CTLFLAG_RD, &stats->prc1522,
3378 			"1023-1522 byte frames received");
3379 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersized",
3380 			CTLFLAG_RD, &stats->ruc,
3381 			"Receive Undersized");
3382 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
3383 			CTLFLAG_RD, &stats->rfc,
3384 			"Fragmented Packets Received ");
3385 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversized",
3386 			CTLFLAG_RD, &stats->roc,
3387 			"Oversized Packets Received");
3388 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabberd",
3389 			CTLFLAG_RD, &stats->rjc,
3390 			"Received Jabber");
3391 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_rcvd",
3392 			CTLFLAG_RD, &stats->mngprc,
3393 			"Management Packets Received");
3394 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_drpd",
3395 			CTLFLAG_RD, &stats->mngptc,
3396 			"Management Packets Dropped");
3397 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "checksum_errs",
3398 			CTLFLAG_RD, &stats->xec,
3399 			"Checksum Errors");
3400 
3401 	/* Packet Transmission Stats */
3402 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
3403 			CTLFLAG_RD, &stats->gotc,
3404 			"Good Octets Transmitted");
3405 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
3406 			CTLFLAG_RD, &stats->tpt,
3407 			"Total Packets Transmitted");
3408 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
3409 			CTLFLAG_RD, &stats->gptc,
3410 			"Good Packets Transmitted");
3411 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
3412 			CTLFLAG_RD, &stats->bptc,
3413 			"Broadcast Packets Transmitted");
3414 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
3415 			CTLFLAG_RD, &stats->mptc,
3416 			"Multicast Packets Transmitted");
3417 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_txd",
3418 			CTLFLAG_RD, &stats->mngptc,
3419 			"Management Packets Transmitted");
3420 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
3421 			CTLFLAG_RD, &stats->ptc64,
3422 			"64 byte frames transmitted ");
3423 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
3424 			CTLFLAG_RD, &stats->ptc127,
3425 			"65-127 byte frames transmitted");
3426 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
3427 			CTLFLAG_RD, &stats->ptc255,
3428 			"128-255 byte frames transmitted");
3429 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
3430 			CTLFLAG_RD, &stats->ptc511,
3431 			"256-511 byte frames transmitted");
3432 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
3433 			CTLFLAG_RD, &stats->ptc1023,
3434 			"512-1023 byte frames transmitted");
3435 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
3436 			CTLFLAG_RD, &stats->ptc1522,
3437 			"1024-1522 byte frames transmitted");
3438 }
3439 #endif
3440 
3441 /*
3442  * Enable the hardware to drop packets when the buffer is full.
3443  * This is useful when multiple RX rings are used, so that no
3444  * single RX ring being full stalls the entire RX engine.  We
3445  * only enable this when multiple RX rings are used and when
3446  * flow control is disabled.
3447  */
3448 static void
3449 ix_enable_rx_drop(struct ix_softc *sc)
3450 {
3451 	struct ixgbe_hw *hw = &sc->hw;
3452 	int i;
3453 
3454 	if (bootverbose) {
3455 		if_printf(&sc->arpcom.ac_if,
3456 		    "flow control %d, enable RX drop\n", sc->fc);
3457 	}
3458 
3459 	for (i = 0; i < sc->rx_ring_inuse; ++i) {
3460 		uint32_t srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
3461 
3462 		srrctl |= IXGBE_SRRCTL_DROP_EN;
3463 		IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
3464 	}
3465 }
3466 
3467 static void
3468 ix_disable_rx_drop(struct ix_softc *sc)
3469 {
3470 	struct ixgbe_hw *hw = &sc->hw;
3471 	int i;
3472 
3473 	if (bootverbose) {
3474 		if_printf(&sc->arpcom.ac_if,
3475 		    "flow control %d, disable RX drop\n", sc->fc);
3476 	}
3477 
3478 	for (i = 0; i < sc->rx_ring_inuse; ++i) {
3479 		uint32_t srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
3480 
3481 		srrctl &= ~IXGBE_SRRCTL_DROP_EN;
3482 		IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
3483 	}
3484 }
3485 
3486 static int
3487 ix_sysctl_flowctrl(SYSCTL_HANDLER_ARGS)
3488 {
3489 	struct ix_softc *sc = (struct ix_softc *)arg1;
3490 	struct ifnet *ifp = &sc->arpcom.ac_if;
3491 	int error, fc;
3492 
3493 	fc = sc->fc;
3494 	error = sysctl_handle_int(oidp, &fc, 0, req);
3495 	if (error || req->newptr == NULL)
3496 		return error;
3497 
3498 	switch (fc) {
3499 	case ixgbe_fc_rx_pause:
3500 	case ixgbe_fc_tx_pause:
3501 	case ixgbe_fc_full:
3502 	case ixgbe_fc_none:
3503 		break;
3504 	default:
3505 		return EINVAL;
3506 	}
3507 
3508 	ifnet_serialize_all(ifp);
3509 
3510 	/* Don't bother if it's not changed */
3511 	if (sc->fc == fc)
3512 		goto done;
3513 	sc->fc = fc;
3514 
3515 	/* Don't do anything, if the interface is not up yet */
3516 	if ((ifp->if_flags & IFF_RUNNING) == 0)
3517 		goto done;
3518 
3519 	if (sc->rx_ring_inuse > 1) {
3520 		switch (sc->fc) {
3521 		case ixgbe_fc_rx_pause:
3522 		case ixgbe_fc_tx_pause:
3523 		case ixgbe_fc_full:
3524 			ix_disable_rx_drop(sc);
3525 			break;
3526 
3527 		case ixgbe_fc_none:
3528 			ix_enable_rx_drop(sc);
3529 			break;
3530 
3531 		default:
3532 			panic("leading fc check mismatch");
3533 		}
3534 	}
3535 
3536 	sc->hw.fc.requested_mode = sc->fc;
3537 	/* Don't autoneg if forcing a value */
3538 	sc->hw.fc.disable_fc_autoneg = TRUE;
3539 	ixgbe_fc_enable(&sc->hw);
3540 
3541 done:
3542 	ifnet_deserialize_all(ifp);
3543 	return error;
3544 }
3545 
3546 #ifdef foo
3547 /* XXX not working properly w/ 82599 connected w/ DAC */
3548 /* XXX only work after the interface is up */
3549 static int
3550 ix_sysctl_advspeed(SYSCTL_HANDLER_ARGS)
3551 {
3552 	struct ix_softc *sc = (struct ix_softc *)arg1;
3553 	struct ifnet *ifp = &sc->arpcom.ac_if;
3554 	struct ixgbe_hw *hw = &sc->hw;
3555 	ixgbe_link_speed speed;
3556 	int error, advspeed;
3557 
3558 	advspeed = sc->advspeed;
3559 	error = sysctl_handle_int(oidp, &advspeed, 0, req);
3560 	if (error || req->newptr == NULL)
3561 		return error;
3562 
3563 	if (!(hw->phy.media_type == ixgbe_media_type_copper ||
3564 	    hw->phy.multispeed_fiber))
3565 		return EOPNOTSUPP;
3566 	if (hw->mac.ops.setup_link == NULL)
3567 		return EOPNOTSUPP;
3568 
3569 	switch (advspeed) {
3570 	case 0:	/* auto */
3571 		speed = IXGBE_LINK_SPEED_UNKNOWN;
3572 		break;
3573 
3574 	case 1:	/* 1Gb */
3575 		speed = IXGBE_LINK_SPEED_1GB_FULL;
3576 		break;
3577 
3578 	case 2:	/* 100Mb */
3579 		speed = IXGBE_LINK_SPEED_100_FULL;
3580 		break;
3581 
3582 	case 3:	/* 1Gb/10Gb */
3583 		speed = IXGBE_LINK_SPEED_1GB_FULL |
3584 		    IXGBE_LINK_SPEED_10GB_FULL;
3585 		break;
3586 
3587 	default:
3588 		return EINVAL;
3589 	}
3590 
3591 	ifnet_serialize_all(ifp);
3592 
3593 	if (sc->advspeed == advspeed) /* no change */
3594 		goto done;
3595 
3596 	if ((speed & IXGBE_LINK_SPEED_100_FULL) &&
3597 	    hw->mac.type != ixgbe_mac_X540) {
3598 		error = EOPNOTSUPP;
3599 		goto done;
3600 	}
3601 
3602 	sc->advspeed = advspeed;
3603 
3604 	if ((ifp->if_flags & IFF_RUNNING) == 0)
3605 		goto done;
3606 
3607 	if (speed == IXGBE_LINK_SPEED_UNKNOWN) {
3608 		ix_config_link(sc);
3609 	} else {
3610 		hw->mac.autotry_restart = TRUE;
3611 		hw->mac.ops.setup_link(hw, speed, sc->link_up);
3612 	}
3613 
3614 done:
3615 	ifnet_deserialize_all(ifp);
3616 	return error;
3617 }
3618 #endif
3619 
3620 static void
3621 ix_setup_serialize(struct ix_softc *sc)
3622 {
3623 	int i = 0, j;
3624 
3625 	/* Main + RX + TX */
3626 	sc->nserialize = 1 + sc->rx_ring_cnt + sc->tx_ring_cnt;
3627 	sc->serializes =
3628 	    kmalloc(sc->nserialize * sizeof(struct lwkt_serialize *),
3629 	        M_DEVBUF, M_WAITOK | M_ZERO);
3630 
3631 	/*
3632 	 * Setup serializes
3633 	 *
3634 	 * NOTE: Order is critical
3635 	 */
3636 
3637 	KKASSERT(i < sc->nserialize);
3638 	sc->serializes[i++] = &sc->main_serialize;
3639 
3640 	for (j = 0; j < sc->rx_ring_cnt; ++j) {
3641 		KKASSERT(i < sc->nserialize);
3642 		sc->serializes[i++] = &sc->rx_rings[j].rx_serialize;
3643 	}
3644 
3645 	for (j = 0; j < sc->tx_ring_cnt; ++j) {
3646 		KKASSERT(i < sc->nserialize);
3647 		sc->serializes[i++] = &sc->tx_rings[j].tx_serialize;
3648 	}
3649 
3650 	KKASSERT(i == sc->nserialize);
3651 }
3652 
3653 static int
3654 ix_alloc_intr(struct ix_softc *sc)
3655 {
3656 	struct ix_intr_data *intr;
3657 	u_int intr_flags;
3658 
3659 	ix_alloc_msix(sc);
3660 	if (sc->intr_type == PCI_INTR_TYPE_MSIX) {
3661 		ix_set_ring_inuse(sc, FALSE);
3662 		return 0;
3663 	}
3664 
3665 	if (sc->intr_data != NULL)
3666 		kfree(sc->intr_data, M_DEVBUF);
3667 
3668 	sc->intr_cnt = 1;
3669 	sc->intr_data = kmalloc(sizeof(struct ix_intr_data), M_DEVBUF,
3670 	    M_WAITOK | M_ZERO);
3671 	intr = &sc->intr_data[0];
3672 
3673 	/*
3674 	 * Allocate MSI/legacy interrupt resource
3675 	 */
3676 	sc->intr_type = pci_alloc_1intr(sc->dev, ix_msi_enable,
3677 	    &intr->intr_rid, &intr_flags);
3678 
3679 	intr->intr_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ,
3680 	    &intr->intr_rid, intr_flags);
3681 	if (intr->intr_res == NULL) {
3682 		device_printf(sc->dev, "Unable to allocate bus resource: "
3683 		    "interrupt\n");
3684 		return ENXIO;
3685 	}
3686 
3687 	intr->intr_serialize = &sc->main_serialize;
3688 	intr->intr_cpuid = rman_get_cpuid(intr->intr_res);
3689 	intr->intr_func = ix_intr;
3690 	intr->intr_funcarg = sc;
3691 	intr->intr_rate = IX_INTR_RATE;
3692 	intr->intr_use = IX_INTR_USE_RXTX;
3693 
3694 	sc->tx_rings[0].tx_intr_cpuid = intr->intr_cpuid;
3695 	sc->tx_rings[0].tx_intr_vec = IX_TX_INTR_VEC;
3696 
3697 	sc->rx_rings[0].rx_intr_vec = IX_RX0_INTR_VEC;
3698 
3699 	ix_set_ring_inuse(sc, FALSE);
3700 
3701 	KKASSERT(sc->rx_ring_inuse <= IX_MIN_RXRING_RSS);
3702 	if (sc->rx_ring_inuse == IX_MIN_RXRING_RSS)
3703 		sc->rx_rings[1].rx_intr_vec = IX_RX1_INTR_VEC;
3704 
3705 	return 0;
3706 }
3707 
3708 static void
3709 ix_free_intr(struct ix_softc *sc)
3710 {
3711 	if (sc->intr_data == NULL)
3712 		return;
3713 
3714 	if (sc->intr_type != PCI_INTR_TYPE_MSIX) {
3715 		struct ix_intr_data *intr = &sc->intr_data[0];
3716 
3717 		KKASSERT(sc->intr_cnt == 1);
3718 		if (intr->intr_res != NULL) {
3719 			bus_release_resource(sc->dev, SYS_RES_IRQ,
3720 			    intr->intr_rid, intr->intr_res);
3721 		}
3722 		if (sc->intr_type == PCI_INTR_TYPE_MSI)
3723 			pci_release_msi(sc->dev);
3724 
3725 		kfree(sc->intr_data, M_DEVBUF);
3726 	} else {
3727 		ix_free_msix(sc, TRUE);
3728 	}
3729 }
3730 
3731 static void
3732 ix_set_ring_inuse(struct ix_softc *sc, boolean_t polling)
3733 {
3734 	sc->rx_ring_inuse = ix_get_rxring_inuse(sc, polling);
3735 	sc->tx_ring_inuse = ix_get_txring_inuse(sc, polling);
3736 	if (bootverbose) {
3737 		if_printf(&sc->arpcom.ac_if,
3738 		    "RX rings %d/%d, TX rings %d/%d\n",
3739 		    sc->rx_ring_inuse, sc->rx_ring_cnt,
3740 		    sc->tx_ring_inuse, sc->tx_ring_cnt);
3741 	}
3742 }
3743 
3744 static int
3745 ix_get_rxring_inuse(const struct ix_softc *sc, boolean_t polling)
3746 {
3747 	if (!IX_ENABLE_HWRSS(sc))
3748 		return 1;
3749 
3750 	if (polling)
3751 		return sc->rx_ring_cnt;
3752 	else if (sc->intr_type != PCI_INTR_TYPE_MSIX)
3753 		return IX_MIN_RXRING_RSS;
3754 	else
3755 		return sc->rx_ring_msix;
3756 }
3757 
3758 static int
3759 ix_get_txring_inuse(const struct ix_softc *sc, boolean_t polling)
3760 {
3761 	if (!IX_ENABLE_HWTSS(sc))
3762 		return 1;
3763 
3764 	if (polling)
3765 		return sc->tx_ring_cnt;
3766 	else if (sc->intr_type != PCI_INTR_TYPE_MSIX)
3767 		return 1;
3768 	else
3769 		return sc->tx_ring_msix;
3770 }
3771 
3772 static int
3773 ix_setup_intr(struct ix_softc *sc)
3774 {
3775 	int i;
3776 
3777 	for (i = 0; i < sc->intr_cnt; ++i) {
3778 		struct ix_intr_data *intr = &sc->intr_data[i];
3779 		int error;
3780 
3781 		error = bus_setup_intr_descr(sc->dev, intr->intr_res,
3782 		    INTR_MPSAFE, intr->intr_func, intr->intr_funcarg,
3783 		    &intr->intr_hand, intr->intr_serialize, intr->intr_desc);
3784 		if (error) {
3785 			device_printf(sc->dev, "can't setup %dth intr\n", i);
3786 			ix_teardown_intr(sc, i);
3787 			return error;
3788 		}
3789 	}
3790 	return 0;
3791 }
3792 
3793 static void
3794 ix_teardown_intr(struct ix_softc *sc, int intr_cnt)
3795 {
3796 	int i;
3797 
3798 	if (sc->intr_data == NULL)
3799 		return;
3800 
3801 	for (i = 0; i < intr_cnt; ++i) {
3802 		struct ix_intr_data *intr = &sc->intr_data[i];
3803 
3804 		bus_teardown_intr(sc->dev, intr->intr_res, intr->intr_hand);
3805 	}
3806 }
3807 
3808 static void
3809 ix_serialize(struct ifnet *ifp, enum ifnet_serialize slz)
3810 {
3811 	struct ix_softc *sc = ifp->if_softc;
3812 
3813 	ifnet_serialize_array_enter(sc->serializes, sc->nserialize, slz);
3814 }
3815 
3816 static void
3817 ix_deserialize(struct ifnet *ifp, enum ifnet_serialize slz)
3818 {
3819 	struct ix_softc *sc = ifp->if_softc;
3820 
3821 	ifnet_serialize_array_exit(sc->serializes, sc->nserialize, slz);
3822 }
3823 
3824 static int
3825 ix_tryserialize(struct ifnet *ifp, enum ifnet_serialize slz)
3826 {
3827 	struct ix_softc *sc = ifp->if_softc;
3828 
3829 	return ifnet_serialize_array_try(sc->serializes, sc->nserialize, slz);
3830 }
3831 
3832 #ifdef INVARIANTS
3833 
3834 static void
3835 ix_serialize_assert(struct ifnet *ifp, enum ifnet_serialize slz,
3836     boolean_t serialized)
3837 {
3838 	struct ix_softc *sc = ifp->if_softc;
3839 
3840 	ifnet_serialize_array_assert(sc->serializes, sc->nserialize, slz,
3841 	    serialized);
3842 }
3843 
3844 #endif	/* INVARIANTS */
3845 
3846 static void
3847 ix_free_rings(struct ix_softc *sc)
3848 {
3849 	int i;
3850 
3851 	if (sc->tx_rings != NULL) {
3852 		for (i = 0; i < sc->tx_ring_cnt; ++i) {
3853 			struct ix_tx_ring *txr = &sc->tx_rings[i];
3854 
3855 			ix_destroy_tx_ring(txr, txr->tx_ndesc);
3856 		}
3857 		kfree(sc->tx_rings, M_DEVBUF);
3858 	}
3859 
3860 	if (sc->rx_rings != NULL) {
3861 		for (i =0; i < sc->rx_ring_cnt; ++i) {
3862 			struct ix_rx_ring *rxr = &sc->rx_rings[i];
3863 
3864 			ix_destroy_rx_ring(rxr, rxr->rx_ndesc);
3865 		}
3866 		kfree(sc->rx_rings, M_DEVBUF);
3867 	}
3868 
3869 	if (sc->parent_tag != NULL)
3870 		bus_dma_tag_destroy(sc->parent_tag);
3871 }
3872 
3873 static void
3874 ix_watchdog(struct ifaltq_subque *ifsq)
3875 {
3876 	struct ix_tx_ring *txr = ifsq_get_priv(ifsq);
3877 	struct ifnet *ifp = ifsq_get_ifp(ifsq);
3878 	struct ix_softc *sc = ifp->if_softc;
3879 	int i;
3880 
3881 	KKASSERT(txr->tx_ifsq == ifsq);
3882 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
3883 
3884 	/*
3885 	 * If the interface has been paused then don't do the watchdog check
3886 	 */
3887 	if (IXGBE_READ_REG(&sc->hw, IXGBE_TFCS) & IXGBE_TFCS_TXOFF) {
3888 		txr->tx_watchdog.wd_timer = 5;
3889 		return;
3890 	}
3891 
3892 	if_printf(ifp, "Watchdog timeout -- resetting\n");
3893 	if_printf(ifp, "Queue(%d) tdh = %d, hw tdt = %d\n", txr->tx_idx,
3894 	    IXGBE_READ_REG(&sc->hw, IXGBE_TDH(txr->tx_idx)),
3895 	    IXGBE_READ_REG(&sc->hw, IXGBE_TDT(txr->tx_idx)));
3896 	if_printf(ifp, "TX(%d) desc avail = %d, next TX to Clean = %d\n",
3897 	    txr->tx_idx, txr->tx_avail, txr->tx_next_clean);
3898 
3899 	ix_init(sc);
3900 	for (i = 0; i < sc->tx_ring_inuse; ++i)
3901 		ifsq_devstart_sched(sc->tx_rings[i].tx_ifsq);
3902 }
3903 
3904 static void
3905 ix_free_tx_ring(struct ix_tx_ring *txr)
3906 {
3907 	int i;
3908 
3909 	for (i = 0; i < txr->tx_ndesc; ++i) {
3910 		struct ix_tx_buf *txbuf = &txr->tx_buf[i];
3911 
3912 		if (txbuf->m_head != NULL) {
3913 			bus_dmamap_unload(txr->tx_tag, txbuf->map);
3914 			m_freem(txbuf->m_head);
3915 			txbuf->m_head = NULL;
3916 		}
3917 	}
3918 }
3919 
3920 static void
3921 ix_free_rx_ring(struct ix_rx_ring *rxr)
3922 {
3923 	int i;
3924 
3925 	for (i = 0; i < rxr->rx_ndesc; ++i) {
3926 		struct ix_rx_buf *rxbuf = &rxr->rx_buf[i];
3927 
3928 		if (rxbuf->fmp != NULL) {
3929 			m_freem(rxbuf->fmp);
3930 			rxbuf->fmp = NULL;
3931 			rxbuf->lmp = NULL;
3932 		} else {
3933 			KKASSERT(rxbuf->lmp == NULL);
3934 		}
3935 		if (rxbuf->m_head != NULL) {
3936 			bus_dmamap_unload(rxr->rx_tag, rxbuf->map);
3937 			m_freem(rxbuf->m_head);
3938 			rxbuf->m_head = NULL;
3939 		}
3940 	}
3941 }
3942 
3943 static int
3944 ix_newbuf(struct ix_rx_ring *rxr, int i, boolean_t wait)
3945 {
3946 	struct mbuf *m;
3947 	bus_dma_segment_t seg;
3948 	bus_dmamap_t map;
3949 	struct ix_rx_buf *rxbuf;
3950 	int flags, error, nseg;
3951 
3952 	flags = M_NOWAIT;
3953 	if (__predict_false(wait))
3954 		flags = M_WAITOK;
3955 
3956 	m = m_getjcl(flags, MT_DATA, M_PKTHDR, rxr->rx_mbuf_sz);
3957 	if (m == NULL) {
3958 		if (wait) {
3959 			if_printf(&rxr->rx_sc->arpcom.ac_if,
3960 			    "Unable to allocate RX mbuf\n");
3961 		}
3962 		return ENOBUFS;
3963 	}
3964 	m->m_len = m->m_pkthdr.len = rxr->rx_mbuf_sz;
3965 
3966 	error = bus_dmamap_load_mbuf_segment(rxr->rx_tag,
3967 	    rxr->rx_sparemap, m, &seg, 1, &nseg, BUS_DMA_NOWAIT);
3968 	if (error) {
3969 		m_freem(m);
3970 		if (wait) {
3971 			if_printf(&rxr->rx_sc->arpcom.ac_if,
3972 			    "Unable to load RX mbuf\n");
3973 		}
3974 		return error;
3975 	}
3976 
3977 	rxbuf = &rxr->rx_buf[i];
3978 	if (rxbuf->m_head != NULL)
3979 		bus_dmamap_unload(rxr->rx_tag, rxbuf->map);
3980 
3981 	map = rxbuf->map;
3982 	rxbuf->map = rxr->rx_sparemap;
3983 	rxr->rx_sparemap = map;
3984 
3985 	rxbuf->m_head = m;
3986 	rxbuf->paddr = seg.ds_addr;
3987 
3988 	ix_setup_rxdesc(&rxr->rx_base[i], rxbuf);
3989 	return 0;
3990 }
3991 
3992 static void
3993 ix_add_sysctl(struct ix_softc *sc)
3994 {
3995 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->dev);
3996 	struct sysctl_oid *tree = device_get_sysctl_tree(sc->dev);
3997 #ifdef IX_RSS_DEBUG
3998 	char node[32];
3999 	int i;
4000 #endif
4001 
4002 	SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree),
4003 	    OID_AUTO, "rxr", CTLFLAG_RD, &sc->rx_ring_cnt, 0, "# of RX rings");
4004 	SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree),
4005 	    OID_AUTO, "rxr_inuse", CTLFLAG_RD, &sc->rx_ring_inuse, 0,
4006 	    "# of RX rings used");
4007 	SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree),
4008 	    OID_AUTO, "txr", CTLFLAG_RD, &sc->tx_ring_cnt, 0, "# of TX rings");
4009 	SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree),
4010 	    OID_AUTO, "txr_inuse", CTLFLAG_RD, &sc->tx_ring_inuse, 0,
4011 	    "# of TX rings used");
4012 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4013 	    OID_AUTO, "rxd", CTLTYPE_INT | CTLFLAG_RD,
4014 	    sc, 0, ix_sysctl_rxd, "I",
4015 	    "# of RX descs");
4016 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4017 	    OID_AUTO, "txd", CTLTYPE_INT | CTLFLAG_RD,
4018 	    sc, 0, ix_sysctl_txd, "I",
4019 	    "# of TX descs");
4020 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4021 	    OID_AUTO, "tx_wreg_nsegs", CTLTYPE_INT | CTLFLAG_RW,
4022 	    sc, 0, ix_sysctl_tx_wreg_nsegs, "I",
4023 	    "# of segments sent before write to hardware register");
4024 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4025 	    OID_AUTO, "rx_wreg_nsegs", CTLTYPE_INT | CTLFLAG_RW,
4026 	    sc, 0, ix_sysctl_rx_wreg_nsegs, "I",
4027 	    "# of received segments sent before write to hardware register");
4028 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4029 	    OID_AUTO, "tx_intr_nsegs", CTLTYPE_INT | CTLFLAG_RW,
4030 	    sc, 0, ix_sysctl_tx_intr_nsegs, "I",
4031 	    "# of segments per TX interrupt");
4032 
4033 #ifdef IFPOLL_ENABLE
4034 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4035 	    OID_AUTO, "npoll_rxoff", CTLTYPE_INT|CTLFLAG_RW,
4036 	    sc, 0, ix_sysctl_npoll_rxoff, "I", "NPOLLING RX cpu offset");
4037 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4038 	    OID_AUTO, "npoll_txoff", CTLTYPE_INT|CTLFLAG_RW,
4039 	    sc, 0, ix_sysctl_npoll_txoff, "I", "NPOLLING TX cpu offset");
4040 #endif
4041 
4042 #define IX_ADD_INTR_RATE_SYSCTL(sc, use, name) \
4043 do { \
4044 	ix_add_intr_rate_sysctl(sc, IX_INTR_USE_##use, #name, \
4045 	    ix_sysctl_##name, #use " interrupt rate"); \
4046 } while (0)
4047 
4048 	IX_ADD_INTR_RATE_SYSCTL(sc, RXTX, rxtx_intr_rate);
4049 	IX_ADD_INTR_RATE_SYSCTL(sc, RX, rx_intr_rate);
4050 	IX_ADD_INTR_RATE_SYSCTL(sc, TX, tx_intr_rate);
4051 	IX_ADD_INTR_RATE_SYSCTL(sc, STATUS, sts_intr_rate);
4052 
4053 #undef IX_ADD_INTR_RATE_SYSCTL
4054 
4055 #ifdef IX_RSS_DEBUG
4056 	SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree),
4057 	    OID_AUTO, "rss_debug", CTLFLAG_RW, &sc->rss_debug, 0,
4058 	    "RSS debug level");
4059 	for (i = 0; i < sc->rx_ring_cnt; ++i) {
4060 		ksnprintf(node, sizeof(node), "rx%d_pkt", i);
4061 		SYSCTL_ADD_ULONG(ctx,
4062 		    SYSCTL_CHILDREN(tree), OID_AUTO, node,
4063 		    CTLFLAG_RW, &sc->rx_rings[i].rx_pkts, "RXed packets");
4064 	}
4065 #endif
4066 
4067 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4068 	    OID_AUTO, "flowctrl", CTLTYPE_INT | CTLFLAG_RW,
4069 	    sc, 0, ix_sysctl_flowctrl, "I",
4070 	    "flow control, 0 - off, 1 - rx pause, 2 - tx pause, 3 - full");
4071 
4072 #ifdef foo
4073 	/*
4074 	 * Allow a kind of speed control by forcing the autoneg
4075 	 * advertised speed list to only a certain value, this
4076 	 * supports 1G on 82599 devices, and 100Mb on X540.
4077 	 */
4078 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4079 	    OID_AUTO, "advspeed", CTLTYPE_INT | CTLFLAG_RW,
4080 	    sc, 0, ix_sysctl_advspeed, "I",
4081 	    "advertised link speed, "
4082 	    "0 - auto, 1 - 1Gb, 2 - 100Mb, 3 - 1Gb/10Gb");
4083 #endif
4084 
4085 #if 0
4086 	ix_add_hw_stats(sc);
4087 #endif
4088 
4089 }
4090 
4091 static int
4092 ix_sysctl_tx_wreg_nsegs(SYSCTL_HANDLER_ARGS)
4093 {
4094 	struct ix_softc *sc = (void *)arg1;
4095 	struct ifnet *ifp = &sc->arpcom.ac_if;
4096 	int error, nsegs, i;
4097 
4098 	nsegs = sc->tx_rings[0].tx_wreg_nsegs;
4099 	error = sysctl_handle_int(oidp, &nsegs, 0, req);
4100 	if (error || req->newptr == NULL)
4101 		return error;
4102 	if (nsegs < 0)
4103 		return EINVAL;
4104 
4105 	ifnet_serialize_all(ifp);
4106 	for (i = 0; i < sc->tx_ring_cnt; ++i)
4107 		sc->tx_rings[i].tx_wreg_nsegs = nsegs;
4108 	ifnet_deserialize_all(ifp);
4109 
4110 	return 0;
4111 }
4112 
4113 static int
4114 ix_sysctl_rx_wreg_nsegs(SYSCTL_HANDLER_ARGS)
4115 {
4116 	struct ix_softc *sc = (void *)arg1;
4117 	struct ifnet *ifp = &sc->arpcom.ac_if;
4118 	int error, nsegs, i;
4119 
4120 	nsegs = sc->rx_rings[0].rx_wreg_nsegs;
4121 	error = sysctl_handle_int(oidp, &nsegs, 0, req);
4122 	if (error || req->newptr == NULL)
4123 		return error;
4124 	if (nsegs < 0)
4125 		return EINVAL;
4126 
4127 	ifnet_serialize_all(ifp);
4128 	for (i = 0; i < sc->rx_ring_cnt; ++i)
4129 		sc->rx_rings[i].rx_wreg_nsegs =nsegs;
4130 	ifnet_deserialize_all(ifp);
4131 
4132 	return 0;
4133 }
4134 
4135 static int
4136 ix_sysctl_txd(SYSCTL_HANDLER_ARGS)
4137 {
4138 	struct ix_softc *sc = (void *)arg1;
4139 	int txd;
4140 
4141 	txd = sc->tx_rings[0].tx_ndesc;
4142 	return sysctl_handle_int(oidp, &txd, 0, req);
4143 }
4144 
4145 static int
4146 ix_sysctl_rxd(SYSCTL_HANDLER_ARGS)
4147 {
4148 	struct ix_softc *sc = (void *)arg1;
4149 	int rxd;
4150 
4151 	rxd = sc->rx_rings[0].rx_ndesc;
4152 	return sysctl_handle_int(oidp, &rxd, 0, req);
4153 }
4154 
4155 static int
4156 ix_sysctl_tx_intr_nsegs(SYSCTL_HANDLER_ARGS)
4157 {
4158 	struct ix_softc *sc = (void *)arg1;
4159 	struct ifnet *ifp = &sc->arpcom.ac_if;
4160 	struct ix_tx_ring *txr = &sc->tx_rings[0];
4161 	int error, nsegs;
4162 
4163 	nsegs = txr->tx_intr_nsegs;
4164 	error = sysctl_handle_int(oidp, &nsegs, 0, req);
4165 	if (error || req->newptr == NULL)
4166 		return error;
4167 	if (nsegs < 0)
4168 		return EINVAL;
4169 
4170 	ifnet_serialize_all(ifp);
4171 
4172 	if (nsegs >= txr->tx_ndesc - IX_MAX_SCATTER - IX_TX_RESERVED) {
4173 		error = EINVAL;
4174 	} else {
4175 		int i;
4176 
4177 		error = 0;
4178 		for (i = 0; i < sc->tx_ring_cnt; ++i)
4179 			sc->tx_rings[i].tx_intr_nsegs = nsegs;
4180 	}
4181 
4182 	ifnet_deserialize_all(ifp);
4183 
4184 	return error;
4185 }
4186 
4187 static void
4188 ix_set_eitr(struct ix_softc *sc, int idx, int rate)
4189 {
4190 	uint32_t eitr, eitr_intvl;
4191 
4192 	eitr = IXGBE_READ_REG(&sc->hw, IXGBE_EITR(idx));
4193 	eitr_intvl = 1000000000 / 256 / rate;
4194 
4195 	if (sc->hw.mac.type == ixgbe_mac_82598EB) {
4196 		eitr &= ~IX_EITR_INTVL_MASK_82598;
4197 		if (eitr_intvl == 0)
4198 			eitr_intvl = 1;
4199 		else if (eitr_intvl > IX_EITR_INTVL_MASK_82598)
4200 			eitr_intvl = IX_EITR_INTVL_MASK_82598;
4201 	} else {
4202 		eitr &= ~IX_EITR_INTVL_MASK;
4203 
4204 		eitr_intvl &= ~IX_EITR_INTVL_RSVD_MASK;
4205 		if (eitr_intvl == 0)
4206 			eitr_intvl = IX_EITR_INTVL_MIN;
4207 		else if (eitr_intvl > IX_EITR_INTVL_MAX)
4208 			eitr_intvl = IX_EITR_INTVL_MAX;
4209 	}
4210 	eitr |= eitr_intvl;
4211 
4212 	IXGBE_WRITE_REG(&sc->hw, IXGBE_EITR(idx), eitr);
4213 }
4214 
4215 static int
4216 ix_sysctl_rxtx_intr_rate(SYSCTL_HANDLER_ARGS)
4217 {
4218 	return ix_sysctl_intr_rate(oidp, arg1, arg2, req, IX_INTR_USE_RXTX);
4219 }
4220 
4221 static int
4222 ix_sysctl_rx_intr_rate(SYSCTL_HANDLER_ARGS)
4223 {
4224 	return ix_sysctl_intr_rate(oidp, arg1, arg2, req, IX_INTR_USE_RX);
4225 }
4226 
4227 static int
4228 ix_sysctl_tx_intr_rate(SYSCTL_HANDLER_ARGS)
4229 {
4230 	return ix_sysctl_intr_rate(oidp, arg1, arg2, req, IX_INTR_USE_TX);
4231 }
4232 
4233 static int
4234 ix_sysctl_sts_intr_rate(SYSCTL_HANDLER_ARGS)
4235 {
4236 	return ix_sysctl_intr_rate(oidp, arg1, arg2, req, IX_INTR_USE_STATUS);
4237 }
4238 
4239 static int
4240 ix_sysctl_intr_rate(SYSCTL_HANDLER_ARGS, int use)
4241 {
4242 	struct ix_softc *sc = (void *)arg1;
4243 	struct ifnet *ifp = &sc->arpcom.ac_if;
4244 	int error, rate, i;
4245 
4246 	rate = 0;
4247 	for (i = 0; i < sc->intr_cnt; ++i) {
4248 		if (sc->intr_data[i].intr_use == use) {
4249 			rate = sc->intr_data[i].intr_rate;
4250 			break;
4251 		}
4252 	}
4253 
4254 	error = sysctl_handle_int(oidp, &rate, 0, req);
4255 	if (error || req->newptr == NULL)
4256 		return error;
4257 	if (rate <= 0)
4258 		return EINVAL;
4259 
4260 	ifnet_serialize_all(ifp);
4261 
4262 	for (i = 0; i < sc->intr_cnt; ++i) {
4263 		if (sc->intr_data[i].intr_use == use) {
4264 			sc->intr_data[i].intr_rate = rate;
4265 			if (ifp->if_flags & IFF_RUNNING)
4266 				ix_set_eitr(sc, i, rate);
4267 		}
4268 	}
4269 
4270 	ifnet_deserialize_all(ifp);
4271 
4272 	return error;
4273 }
4274 
4275 static void
4276 ix_add_intr_rate_sysctl(struct ix_softc *sc, int use,
4277     const char *name, int (*handler)(SYSCTL_HANDLER_ARGS), const char *desc)
4278 {
4279 	int i;
4280 
4281 	for (i = 0; i < sc->intr_cnt; ++i) {
4282 		if (sc->intr_data[i].intr_use == use) {
4283 			SYSCTL_ADD_PROC(device_get_sysctl_ctx(sc->dev),
4284 			    SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)),
4285 			    OID_AUTO, name, CTLTYPE_INT | CTLFLAG_RW,
4286 			    sc, 0, handler, "I", desc);
4287 			break;
4288 		}
4289 	}
4290 }
4291 
4292 static void
4293 ix_set_timer_cpuid(struct ix_softc *sc, boolean_t polling)
4294 {
4295 	if (polling || sc->intr_type == PCI_INTR_TYPE_MSIX)
4296 		sc->timer_cpuid = 0; /* XXX fixed */
4297 	else
4298 		sc->timer_cpuid = rman_get_cpuid(sc->intr_data[0].intr_res);
4299 }
4300 
4301 static void
4302 ix_alloc_msix(struct ix_softc *sc)
4303 {
4304 	int msix_enable, msix_cnt, msix_cnt2, alloc_cnt;
4305 	struct ix_intr_data *intr;
4306 	int i, x, error;
4307 	int offset, offset_def, agg_rxtx, ring_max;
4308 	boolean_t aggregate, setup = FALSE;
4309 
4310 	msix_enable = ix_msix_enable;
4311 	/*
4312 	 * Don't enable MSI-X on 82598 by default, see:
4313 	 * 82598 specification update errata #38
4314 	 */
4315 	if (sc->hw.mac.type == ixgbe_mac_82598EB)
4316 		msix_enable = 0;
4317 	msix_enable = device_getenv_int(sc->dev, "msix.enable", msix_enable);
4318 	if (!msix_enable)
4319 		return;
4320 
4321 	msix_cnt = pci_msix_count(sc->dev);
4322 #ifdef IX_MSIX_DEBUG
4323 	msix_cnt = device_getenv_int(sc->dev, "msix.count", msix_cnt);
4324 #endif
4325 	if (msix_cnt <= 1) {
4326 		/* One MSI-X model does not make sense */
4327 		return;
4328 	}
4329 
4330 	i = 0;
4331 	while ((1 << (i + 1)) <= msix_cnt)
4332 		++i;
4333 	msix_cnt2 = 1 << i;
4334 
4335 	if (bootverbose) {
4336 		device_printf(sc->dev, "MSI-X count %d/%d\n",
4337 		    msix_cnt2, msix_cnt);
4338 	}
4339 
4340 	KKASSERT(msix_cnt >= msix_cnt2);
4341 	if (msix_cnt == msix_cnt2) {
4342 		/* We need at least one MSI-X for link status */
4343 		msix_cnt2 >>= 1;
4344 		if (msix_cnt2 <= 1) {
4345 			/* One MSI-X for RX/TX does not make sense */
4346 			device_printf(sc->dev, "not enough MSI-X for TX/RX, "
4347 			    "MSI-X count %d/%d\n", msix_cnt2, msix_cnt);
4348 			return;
4349 		}
4350 		KKASSERT(msix_cnt > msix_cnt2);
4351 
4352 		if (bootverbose) {
4353 			device_printf(sc->dev, "MSI-X count eq fixup %d/%d\n",
4354 			    msix_cnt2, msix_cnt);
4355 		}
4356 	}
4357 
4358 	/*
4359 	 * Make sure that we don't break interrupt related registers
4360 	 * (EIMS, etc) limitation.
4361 	 *
4362 	 * NOTE: msix_cnt > msix_cnt2, when we reach here
4363 	 */
4364 	if (sc->hw.mac.type == ixgbe_mac_82598EB) {
4365 		if (msix_cnt2 > IX_MAX_MSIX_82598)
4366 			msix_cnt2 = IX_MAX_MSIX_82598;
4367 	} else {
4368 		if (msix_cnt2 > IX_MAX_MSIX)
4369 			msix_cnt2 = IX_MAX_MSIX;
4370 	}
4371 	msix_cnt = msix_cnt2 + 1;	/* +1 for status */
4372 
4373 	if (bootverbose) {
4374 		device_printf(sc->dev, "MSI-X count max fixup %d/%d\n",
4375 		    msix_cnt2, msix_cnt);
4376 	}
4377 
4378 	sc->rx_ring_msix = sc->rx_ring_cnt;
4379 	if (sc->rx_ring_msix > msix_cnt2)
4380 		sc->rx_ring_msix = msix_cnt2;
4381 
4382 	sc->tx_ring_msix = sc->tx_ring_cnt;
4383 	if (sc->tx_ring_msix > msix_cnt2)
4384 		sc->tx_ring_msix = msix_cnt2;
4385 
4386 	ring_max = sc->rx_ring_msix;
4387 	if (ring_max < sc->tx_ring_msix)
4388 		ring_max = sc->tx_ring_msix;
4389 
4390 	/* Allow user to force independent RX/TX MSI-X handling */
4391 	agg_rxtx = device_getenv_int(sc->dev, "msix.agg_rxtx",
4392 	    ix_msix_agg_rxtx);
4393 
4394 	if (!agg_rxtx && msix_cnt >= sc->tx_ring_msix + sc->rx_ring_msix + 1) {
4395 		/*
4396 		 * Independent TX/RX MSI-X
4397 		 */
4398 		aggregate = FALSE;
4399 		if (bootverbose)
4400 			device_printf(sc->dev, "independent TX/RX MSI-X\n");
4401 		alloc_cnt = sc->tx_ring_msix + sc->rx_ring_msix;
4402 	} else {
4403 		/*
4404 		 * Aggregate TX/RX MSI-X
4405 		 */
4406 		aggregate = TRUE;
4407 		if (bootverbose)
4408 			device_printf(sc->dev, "aggregate TX/RX MSI-X\n");
4409 		alloc_cnt = msix_cnt2;
4410 		if (alloc_cnt > ring_max)
4411 			alloc_cnt = ring_max;
4412 		KKASSERT(alloc_cnt >= sc->rx_ring_msix &&
4413 		    alloc_cnt >= sc->tx_ring_msix);
4414 	}
4415 	++alloc_cnt;	/* For status */
4416 
4417 	if (bootverbose) {
4418 		device_printf(sc->dev, "MSI-X alloc %d, "
4419 		    "RX ring %d, TX ring %d\n", alloc_cnt,
4420 		    sc->rx_ring_msix, sc->tx_ring_msix);
4421 	}
4422 
4423 	sc->msix_mem_rid = PCIR_BAR(IX_MSIX_BAR_82598);
4424 	sc->msix_mem_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
4425 	    &sc->msix_mem_rid, RF_ACTIVE);
4426 	if (sc->msix_mem_res == NULL) {
4427 		sc->msix_mem_rid = PCIR_BAR(IX_MSIX_BAR_82599);
4428 		sc->msix_mem_res = bus_alloc_resource_any(sc->dev,
4429 		    SYS_RES_MEMORY, &sc->msix_mem_rid, RF_ACTIVE);
4430 		if (sc->msix_mem_res == NULL) {
4431 			device_printf(sc->dev, "Unable to map MSI-X table\n");
4432 			return;
4433 		}
4434 	}
4435 
4436 	sc->intr_cnt = alloc_cnt;
4437 	sc->intr_data = kmalloc(sizeof(struct ix_intr_data) * sc->intr_cnt,
4438 	    M_DEVBUF, M_WAITOK | M_ZERO);
4439 	for (x = 0; x < sc->intr_cnt; ++x) {
4440 		intr = &sc->intr_data[x];
4441 		intr->intr_rid = -1;
4442 		intr->intr_rate = IX_INTR_RATE;
4443 	}
4444 
4445 	x = 0;
4446 	if (!aggregate) {
4447 		/*
4448 		 * RX rings
4449 		 */
4450 		if (sc->rx_ring_msix == ncpus2) {
4451 			offset = 0;
4452 		} else {
4453 			offset_def = (sc->rx_ring_msix *
4454 			    device_get_unit(sc->dev)) % ncpus2;
4455 
4456 			offset = device_getenv_int(sc->dev,
4457 			    "msix.rxoff", offset_def);
4458 			if (offset >= ncpus2 ||
4459 			    offset % sc->rx_ring_msix != 0) {
4460 				device_printf(sc->dev,
4461 				    "invalid msix.rxoff %d, use %d\n",
4462 				    offset, offset_def);
4463 				offset = offset_def;
4464 			}
4465 		}
4466 		ix_conf_rx_msix(sc, 0, &x, offset);
4467 
4468 		/*
4469 		 * TX rings
4470 		 */
4471 		if (sc->tx_ring_msix == ncpus2) {
4472 			offset = 0;
4473 		} else {
4474 			offset_def = (sc->tx_ring_msix *
4475 			    device_get_unit(sc->dev)) % ncpus2;
4476 
4477 			offset = device_getenv_int(sc->dev,
4478 			    "msix.txoff", offset_def);
4479 			if (offset >= ncpus2 ||
4480 			    offset % sc->tx_ring_msix != 0) {
4481 				device_printf(sc->dev,
4482 				    "invalid msix.txoff %d, use %d\n",
4483 				    offset, offset_def);
4484 				offset = offset_def;
4485 			}
4486 		}
4487 		ix_conf_tx_msix(sc, 0, &x, offset);
4488 	} else {
4489 		int ring_agg;
4490 
4491 		ring_agg = sc->rx_ring_msix;
4492 		if (ring_agg > sc->tx_ring_msix)
4493 			ring_agg = sc->tx_ring_msix;
4494 
4495 		if (ring_max == ncpus2) {
4496 			offset = 0;
4497 		} else {
4498 			offset_def = (ring_max * device_get_unit(sc->dev)) %
4499 			    ncpus2;
4500 
4501 			offset = device_getenv_int(sc->dev, "msix.off",
4502 			    offset_def);
4503 			if (offset >= ncpus2 || offset % ring_max != 0) {
4504 				device_printf(sc->dev,
4505 				    "invalid msix.off %d, use %d\n",
4506 				    offset, offset_def);
4507 				offset = offset_def;
4508 			}
4509 		}
4510 
4511 		for (i = 0; i < ring_agg; ++i) {
4512 			struct ix_tx_ring *txr = &sc->tx_rings[i];
4513 			struct ix_rx_ring *rxr = &sc->rx_rings[i];
4514 
4515 			KKASSERT(x < sc->intr_cnt);
4516 			rxr->rx_intr_vec = x;
4517 			ix_setup_msix_eims(sc, x,
4518 			    &rxr->rx_eims, &rxr->rx_eims_val);
4519 			rxr->rx_txr = txr;
4520 			/* NOTE: Leave TX ring's intr_vec negative */
4521 
4522 			intr = &sc->intr_data[x++];
4523 
4524 			intr->intr_serialize = &rxr->rx_serialize;
4525 			intr->intr_func = ix_msix_rxtx;
4526 			intr->intr_funcarg = rxr;
4527 			intr->intr_use = IX_INTR_USE_RXTX;
4528 
4529 			intr->intr_cpuid = i + offset;
4530 			KKASSERT(intr->intr_cpuid < ncpus2);
4531 			txr->tx_intr_cpuid = intr->intr_cpuid;
4532 
4533 			ksnprintf(intr->intr_desc0, sizeof(intr->intr_desc0),
4534 			    "%s rxtx%d", device_get_nameunit(sc->dev), i);
4535 			intr->intr_desc = intr->intr_desc0;
4536 		}
4537 
4538 		if (ring_agg != ring_max) {
4539 			if (ring_max == sc->tx_ring_msix)
4540 				ix_conf_tx_msix(sc, i, &x, offset);
4541 			else
4542 				ix_conf_rx_msix(sc, i, &x, offset);
4543 		}
4544 	}
4545 
4546 	/*
4547 	 * Status MSI-X
4548 	 */
4549 	KKASSERT(x < sc->intr_cnt);
4550 	sc->sts_msix_vec = x;
4551 
4552 	intr = &sc->intr_data[x++];
4553 
4554 	intr->intr_serialize = &sc->main_serialize;
4555 	intr->intr_func = ix_msix_status;
4556 	intr->intr_funcarg = sc;
4557 	intr->intr_cpuid = 0;
4558 	intr->intr_use = IX_INTR_USE_STATUS;
4559 
4560 	ksnprintf(intr->intr_desc0, sizeof(intr->intr_desc0), "%s sts",
4561 	    device_get_nameunit(sc->dev));
4562 	intr->intr_desc = intr->intr_desc0;
4563 
4564 	KKASSERT(x == sc->intr_cnt);
4565 
4566 	error = pci_setup_msix(sc->dev);
4567 	if (error) {
4568 		device_printf(sc->dev, "Setup MSI-X failed\n");
4569 		goto back;
4570 	}
4571 	setup = TRUE;
4572 
4573 	for (i = 0; i < sc->intr_cnt; ++i) {
4574 		intr = &sc->intr_data[i];
4575 
4576 		error = pci_alloc_msix_vector(sc->dev, i, &intr->intr_rid,
4577 		    intr->intr_cpuid);
4578 		if (error) {
4579 			device_printf(sc->dev,
4580 			    "Unable to allocate MSI-X %d on cpu%d\n", i,
4581 			    intr->intr_cpuid);
4582 			goto back;
4583 		}
4584 
4585 		intr->intr_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ,
4586 		    &intr->intr_rid, RF_ACTIVE);
4587 		if (intr->intr_res == NULL) {
4588 			device_printf(sc->dev,
4589 			    "Unable to allocate MSI-X %d resource\n", i);
4590 			error = ENOMEM;
4591 			goto back;
4592 		}
4593 	}
4594 
4595 	pci_enable_msix(sc->dev);
4596 	sc->intr_type = PCI_INTR_TYPE_MSIX;
4597 back:
4598 	if (error)
4599 		ix_free_msix(sc, setup);
4600 }
4601 
4602 static void
4603 ix_free_msix(struct ix_softc *sc, boolean_t setup)
4604 {
4605 	int i;
4606 
4607 	KKASSERT(sc->intr_cnt > 1);
4608 
4609 	for (i = 0; i < sc->intr_cnt; ++i) {
4610 		struct ix_intr_data *intr = &sc->intr_data[i];
4611 
4612 		if (intr->intr_res != NULL) {
4613 			bus_release_resource(sc->dev, SYS_RES_IRQ,
4614 			    intr->intr_rid, intr->intr_res);
4615 		}
4616 		if (intr->intr_rid >= 0)
4617 			pci_release_msix_vector(sc->dev, intr->intr_rid);
4618 	}
4619 	if (setup)
4620 		pci_teardown_msix(sc->dev);
4621 
4622 	sc->intr_cnt = 0;
4623 	kfree(sc->intr_data, M_DEVBUF);
4624 	sc->intr_data = NULL;
4625 }
4626 
4627 static void
4628 ix_conf_rx_msix(struct ix_softc *sc, int i, int *x0, int offset)
4629 {
4630 	int x = *x0;
4631 
4632 	for (; i < sc->rx_ring_msix; ++i) {
4633 		struct ix_rx_ring *rxr = &sc->rx_rings[i];
4634 		struct ix_intr_data *intr;
4635 
4636 		KKASSERT(x < sc->intr_cnt);
4637 		rxr->rx_intr_vec = x;
4638 		ix_setup_msix_eims(sc, x, &rxr->rx_eims, &rxr->rx_eims_val);
4639 
4640 		intr = &sc->intr_data[x++];
4641 
4642 		intr->intr_serialize = &rxr->rx_serialize;
4643 		intr->intr_func = ix_msix_rx;
4644 		intr->intr_funcarg = rxr;
4645 		intr->intr_rate = IX_MSIX_RX_RATE;
4646 		intr->intr_use = IX_INTR_USE_RX;
4647 
4648 		intr->intr_cpuid = i + offset;
4649 		KKASSERT(intr->intr_cpuid < ncpus2);
4650 
4651 		ksnprintf(intr->intr_desc0, sizeof(intr->intr_desc0), "%s rx%d",
4652 		    device_get_nameunit(sc->dev), i);
4653 		intr->intr_desc = intr->intr_desc0;
4654 	}
4655 	*x0 = x;
4656 }
4657 
4658 static void
4659 ix_conf_tx_msix(struct ix_softc *sc, int i, int *x0, int offset)
4660 {
4661 	int x = *x0;
4662 
4663 	for (; i < sc->tx_ring_msix; ++i) {
4664 		struct ix_tx_ring *txr = &sc->tx_rings[i];
4665 		struct ix_intr_data *intr;
4666 
4667 		KKASSERT(x < sc->intr_cnt);
4668 		txr->tx_intr_vec = x;
4669 		ix_setup_msix_eims(sc, x, &txr->tx_eims, &txr->tx_eims_val);
4670 
4671 		intr = &sc->intr_data[x++];
4672 
4673 		intr->intr_serialize = &txr->tx_serialize;
4674 		intr->intr_func = ix_msix_tx;
4675 		intr->intr_funcarg = txr;
4676 		intr->intr_rate = IX_MSIX_TX_RATE;
4677 		intr->intr_use = IX_INTR_USE_TX;
4678 
4679 		intr->intr_cpuid = i + offset;
4680 		KKASSERT(intr->intr_cpuid < ncpus2);
4681 		txr->tx_intr_cpuid = intr->intr_cpuid;
4682 
4683 		ksnprintf(intr->intr_desc0, sizeof(intr->intr_desc0), "%s tx%d",
4684 		    device_get_nameunit(sc->dev), i);
4685 		intr->intr_desc = intr->intr_desc0;
4686 	}
4687 	*x0 = x;
4688 }
4689 
4690 static void
4691 ix_msix_rx(void *xrxr)
4692 {
4693 	struct ix_rx_ring *rxr = xrxr;
4694 
4695 	ASSERT_SERIALIZED(&rxr->rx_serialize);
4696 
4697 	ix_rxeof(rxr, -1);
4698 	IXGBE_WRITE_REG(&rxr->rx_sc->hw, rxr->rx_eims, rxr->rx_eims_val);
4699 }
4700 
4701 static void
4702 ix_msix_tx(void *xtxr)
4703 {
4704 	struct ix_tx_ring *txr = xtxr;
4705 
4706 	ASSERT_SERIALIZED(&txr->tx_serialize);
4707 
4708 	ix_txeof(txr, *(txr->tx_hdr));
4709 	if (!ifsq_is_empty(txr->tx_ifsq))
4710 		ifsq_devstart(txr->tx_ifsq);
4711 	IXGBE_WRITE_REG(&txr->tx_sc->hw, txr->tx_eims, txr->tx_eims_val);
4712 }
4713 
4714 static void
4715 ix_msix_rxtx(void *xrxr)
4716 {
4717 	struct ix_rx_ring *rxr = xrxr;
4718 	struct ix_tx_ring *txr;
4719 	int hdr;
4720 
4721 	ASSERT_SERIALIZED(&rxr->rx_serialize);
4722 
4723 	ix_rxeof(rxr, -1);
4724 
4725 	/*
4726 	 * NOTE:
4727 	 * Since tx_next_clean is only changed by ix_txeof(),
4728 	 * which is called only in interrupt handler, the
4729 	 * check w/o holding tx serializer is MPSAFE.
4730 	 */
4731 	txr = rxr->rx_txr;
4732 	hdr = *(txr->tx_hdr);
4733 	if (hdr != txr->tx_next_clean) {
4734 		lwkt_serialize_enter(&txr->tx_serialize);
4735 		ix_txeof(txr, hdr);
4736 		if (!ifsq_is_empty(txr->tx_ifsq))
4737 			ifsq_devstart(txr->tx_ifsq);
4738 		lwkt_serialize_exit(&txr->tx_serialize);
4739 	}
4740 
4741 	IXGBE_WRITE_REG(&rxr->rx_sc->hw, rxr->rx_eims, rxr->rx_eims_val);
4742 }
4743 
4744 static void
4745 ix_intr_status(struct ix_softc *sc, uint32_t eicr)
4746 {
4747 	struct ixgbe_hw *hw = &sc->hw;
4748 
4749 	/* Link status change */
4750 	if (eicr & IXGBE_EICR_LSC)
4751 		ix_handle_link(sc);
4752 
4753 	if (hw->mac.type != ixgbe_mac_82598EB) {
4754 		if (eicr & IXGBE_EICR_ECC)
4755 			if_printf(&sc->arpcom.ac_if, "ECC ERROR!!  Reboot!!\n");
4756 		else if (eicr & IXGBE_EICR_GPI_SDP1)
4757 			ix_handle_msf(sc);
4758 		else if (eicr & IXGBE_EICR_GPI_SDP2)
4759 			ix_handle_mod(sc);
4760 	}
4761 
4762 	/* Check for fan failure */
4763 	if (hw->device_id == IXGBE_DEV_ID_82598AT &&
4764 	    (eicr & IXGBE_EICR_GPI_SDP1))
4765 		if_printf(&sc->arpcom.ac_if, "FAN FAILURE!!  Replace!!\n");
4766 
4767 	/* Check for over temp condition */
4768 	if (hw->mac.type == ixgbe_mac_X540 && (eicr & IXGBE_EICR_TS)) {
4769 		if_printf(&sc->arpcom.ac_if, "OVER TEMP!!  "
4770 		    "PHY IS SHUT DOWN!!  Reboot\n");
4771 	}
4772 }
4773 
4774 static void
4775 ix_msix_status(void *xsc)
4776 {
4777 	struct ix_softc *sc = xsc;
4778 	uint32_t eicr;
4779 
4780 	ASSERT_SERIALIZED(&sc->main_serialize);
4781 
4782 	eicr = IXGBE_READ_REG(&sc->hw, IXGBE_EICR);
4783 	ix_intr_status(sc, eicr);
4784 
4785 	IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMS, sc->intr_mask);
4786 }
4787 
4788 static void
4789 ix_setup_msix_eims(const struct ix_softc *sc, int x,
4790     uint32_t *eims, uint32_t *eims_val)
4791 {
4792 	if (x < 32) {
4793 		if (sc->hw.mac.type == ixgbe_mac_82598EB) {
4794 			KASSERT(x < IX_MAX_MSIX_82598,
4795 			    ("%s: invalid vector %d for 82598",
4796 			     device_get_nameunit(sc->dev), x));
4797 			*eims = IXGBE_EIMS;
4798 		} else {
4799 			*eims = IXGBE_EIMS_EX(0);
4800 		}
4801 		*eims_val = 1 << x;
4802 	} else {
4803 		KASSERT(x < IX_MAX_MSIX, ("%s: invalid vector %d",
4804 		    device_get_nameunit(sc->dev), x));
4805 		KASSERT(sc->hw.mac.type != ixgbe_mac_82598EB,
4806 		    ("%s: invalid vector %d for 82598",
4807 		     device_get_nameunit(sc->dev), x));
4808 		*eims = IXGBE_EIMS_EX(1);
4809 		*eims_val = 1 << (x - 32);
4810 	}
4811 }
4812 
4813 #ifdef IFPOLL_ENABLE
4814 
4815 static void
4816 ix_npoll_status(struct ifnet *ifp)
4817 {
4818 	struct ix_softc *sc = ifp->if_softc;
4819 	uint32_t eicr;
4820 
4821 	ASSERT_SERIALIZED(&sc->main_serialize);
4822 
4823 	eicr = IXGBE_READ_REG(&sc->hw, IXGBE_EICR);
4824 	ix_intr_status(sc, eicr);
4825 }
4826 
4827 static void
4828 ix_npoll_tx(struct ifnet *ifp, void *arg, int cycle __unused)
4829 {
4830 	struct ix_tx_ring *txr = arg;
4831 
4832 	ASSERT_SERIALIZED(&txr->tx_serialize);
4833 
4834 	ix_txeof(txr, *(txr->tx_hdr));
4835 	if (!ifsq_is_empty(txr->tx_ifsq))
4836 		ifsq_devstart(txr->tx_ifsq);
4837 }
4838 
4839 static void
4840 ix_npoll_rx(struct ifnet *ifp __unused, void *arg, int cycle)
4841 {
4842 	struct ix_rx_ring *rxr = arg;
4843 
4844 	ASSERT_SERIALIZED(&rxr->rx_serialize);
4845 
4846 	ix_rxeof(rxr, cycle);
4847 }
4848 
4849 static void
4850 ix_npoll(struct ifnet *ifp, struct ifpoll_info *info)
4851 {
4852 	struct ix_softc *sc = ifp->if_softc;
4853 	int i, txr_cnt, rxr_cnt;
4854 
4855 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
4856 
4857 	if (info) {
4858 		int off;
4859 
4860 		info->ifpi_status.status_func = ix_npoll_status;
4861 		info->ifpi_status.serializer = &sc->main_serialize;
4862 
4863 		txr_cnt = ix_get_txring_inuse(sc, TRUE);
4864 		off = sc->tx_npoll_off;
4865 		for (i = 0; i < txr_cnt; ++i) {
4866 			struct ix_tx_ring *txr = &sc->tx_rings[i];
4867 			int idx = i + off;
4868 
4869 			KKASSERT(idx < ncpus2);
4870 			info->ifpi_tx[idx].poll_func = ix_npoll_tx;
4871 			info->ifpi_tx[idx].arg = txr;
4872 			info->ifpi_tx[idx].serializer = &txr->tx_serialize;
4873 			ifsq_set_cpuid(txr->tx_ifsq, idx);
4874 		}
4875 
4876 		rxr_cnt = ix_get_rxring_inuse(sc, TRUE);
4877 		off = sc->rx_npoll_off;
4878 		for (i = 0; i < rxr_cnt; ++i) {
4879 			struct ix_rx_ring *rxr = &sc->rx_rings[i];
4880 			int idx = i + off;
4881 
4882 			KKASSERT(idx < ncpus2);
4883 			info->ifpi_rx[idx].poll_func = ix_npoll_rx;
4884 			info->ifpi_rx[idx].arg = rxr;
4885 			info->ifpi_rx[idx].serializer = &rxr->rx_serialize;
4886 		}
4887 
4888 		if (ifp->if_flags & IFF_RUNNING) {
4889 			if (rxr_cnt == sc->rx_ring_inuse &&
4890 			    txr_cnt == sc->tx_ring_inuse) {
4891 				ix_set_timer_cpuid(sc, TRUE);
4892 				ix_disable_intr(sc);
4893 			} else {
4894 				ix_init(sc);
4895 			}
4896 		}
4897 	} else {
4898 		for (i = 0; i < sc->tx_ring_cnt; ++i) {
4899 			struct ix_tx_ring *txr = &sc->tx_rings[i];
4900 
4901 			ifsq_set_cpuid(txr->tx_ifsq, txr->tx_intr_cpuid);
4902 		}
4903 
4904 		if (ifp->if_flags & IFF_RUNNING) {
4905 			txr_cnt = ix_get_txring_inuse(sc, FALSE);
4906 			rxr_cnt = ix_get_rxring_inuse(sc, FALSE);
4907 
4908 			if (rxr_cnt == sc->rx_ring_inuse &&
4909 			    txr_cnt == sc->tx_ring_inuse) {
4910 				ix_set_timer_cpuid(sc, FALSE);
4911 				ix_enable_intr(sc);
4912 			} else {
4913 				ix_init(sc);
4914 			}
4915 		}
4916 	}
4917 }
4918 
4919 static int
4920 ix_sysctl_npoll_rxoff(SYSCTL_HANDLER_ARGS)
4921 {
4922 	struct ix_softc *sc = (void *)arg1;
4923 	struct ifnet *ifp = &sc->arpcom.ac_if;
4924 	int error, off;
4925 
4926 	off = sc->rx_npoll_off;
4927 	error = sysctl_handle_int(oidp, &off, 0, req);
4928 	if (error || req->newptr == NULL)
4929 		return error;
4930 	if (off < 0)
4931 		return EINVAL;
4932 
4933 	ifnet_serialize_all(ifp);
4934 	if (off >= ncpus2 || off % sc->rx_ring_cnt != 0) {
4935 		error = EINVAL;
4936 	} else {
4937 		error = 0;
4938 		sc->rx_npoll_off = off;
4939 	}
4940 	ifnet_deserialize_all(ifp);
4941 
4942 	return error;
4943 }
4944 
4945 static int
4946 ix_sysctl_npoll_txoff(SYSCTL_HANDLER_ARGS)
4947 {
4948 	struct ix_softc *sc = (void *)arg1;
4949 	struct ifnet *ifp = &sc->arpcom.ac_if;
4950 	int error, off;
4951 
4952 	off = sc->tx_npoll_off;
4953 	error = sysctl_handle_int(oidp, &off, 0, req);
4954 	if (error || req->newptr == NULL)
4955 		return error;
4956 	if (off < 0)
4957 		return EINVAL;
4958 
4959 	ifnet_serialize_all(ifp);
4960 	if (off >= ncpus2 || off % sc->tx_ring_cnt != 0) {
4961 		error = EINVAL;
4962 	} else {
4963 		error = 0;
4964 		sc->tx_npoll_off = off;
4965 	}
4966 	ifnet_deserialize_all(ifp);
4967 
4968 	return error;
4969 }
4970 
4971 #endif /* IFPOLL_ENABLE */
4972