xref: /dflybsd-src/sys/dev/netif/emx/if_emx.c (revision d89cda310fac18bac18edce42fa346ac8f9c3e20)
1 /*
2  * Copyright (c) 2004 Joerg Sonnenberger <joerg@bec.de>.  All rights reserved.
3  *
4  * Copyright (c) 2001-2008, Intel Corporation
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  *  1. Redistributions of source code must retain the above copyright notice,
11  *     this list of conditions and the following disclaimer.
12  *
13  *  2. Redistributions in binary form must reproduce the above copyright
14  *     notice, this list of conditions and the following disclaimer in the
15  *     documentation and/or other materials provided with the distribution.
16  *
17  *  3. Neither the name of the Intel Corporation nor the names of its
18  *     contributors may be used to endorse or promote products derived from
19  *     this software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  *
33  *
34  * Copyright (c) 2005 The DragonFly Project.  All rights reserved.
35  *
36  * This code is derived from software contributed to The DragonFly Project
37  * by Matthew Dillon <dillon@backplane.com>
38  *
39  * Redistribution and use in source and binary forms, with or without
40  * modification, are permitted provided that the following conditions
41  * are met:
42  *
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in
47  *    the documentation and/or other materials provided with the
48  *    distribution.
49  * 3. Neither the name of The DragonFly Project nor the names of its
50  *    contributors may be used to endorse or promote products derived
51  *    from this software without specific, prior written permission.
52  *
53  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
54  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
55  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
56  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
57  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
58  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
59  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
60  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
61  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
62  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
63  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64  * SUCH DAMAGE.
65  */
66 
67 #include "opt_ifpoll.h"
68 #include "opt_emx.h"
69 
70 #include <sys/param.h>
71 #include <sys/bus.h>
72 #include <sys/endian.h>
73 #include <sys/interrupt.h>
74 #include <sys/kernel.h>
75 #include <sys/ktr.h>
76 #include <sys/malloc.h>
77 #include <sys/mbuf.h>
78 #include <sys/proc.h>
79 #include <sys/rman.h>
80 #include <sys/serialize.h>
81 #include <sys/serialize2.h>
82 #include <sys/socket.h>
83 #include <sys/sockio.h>
84 #include <sys/sysctl.h>
85 #include <sys/systm.h>
86 
87 #include <net/bpf.h>
88 #include <net/ethernet.h>
89 #include <net/if.h>
90 #include <net/if_arp.h>
91 #include <net/if_dl.h>
92 #include <net/if_media.h>
93 #include <net/ifq_var.h>
94 #include <net/toeplitz.h>
95 #include <net/toeplitz2.h>
96 #include <net/vlan/if_vlan_var.h>
97 #include <net/vlan/if_vlan_ether.h>
98 #include <net/if_poll.h>
99 
100 #include <netinet/in_systm.h>
101 #include <netinet/in.h>
102 #include <netinet/ip.h>
103 #include <netinet/tcp.h>
104 #include <netinet/udp.h>
105 
106 #include <bus/pci/pcivar.h>
107 #include <bus/pci/pcireg.h>
108 
109 #include <dev/netif/ig_hal/e1000_api.h>
110 #include <dev/netif/ig_hal/e1000_82571.h>
111 #include <dev/netif/emx/if_emx.h>
112 
113 #ifdef EMX_RSS_DEBUG
114 #define EMX_RSS_DPRINTF(sc, lvl, fmt, ...) \
115 do { \
116 	if (sc->rss_debug >= lvl) \
117 		if_printf(&sc->arpcom.ac_if, fmt, __VA_ARGS__); \
118 } while (0)
119 #else	/* !EMX_RSS_DEBUG */
120 #define EMX_RSS_DPRINTF(sc, lvl, fmt, ...)	((void)0)
121 #endif	/* EMX_RSS_DEBUG */
122 
123 #define EMX_TX_SERIALIZE	1
124 #define EMX_RX_SERIALIZE	2
125 
126 #define EMX_NAME	"Intel(R) PRO/1000 "
127 
128 #define EMX_DEVICE(id)	\
129 	{ EMX_VENDOR_ID, E1000_DEV_ID_##id, EMX_NAME #id }
130 #define EMX_DEVICE_NULL	{ 0, 0, NULL }
131 
132 static const struct emx_device {
133 	uint16_t	vid;
134 	uint16_t	did;
135 	const char	*desc;
136 } emx_devices[] = {
137 	EMX_DEVICE(82571EB_COPPER),
138 	EMX_DEVICE(82571EB_FIBER),
139 	EMX_DEVICE(82571EB_SERDES),
140 	EMX_DEVICE(82571EB_SERDES_DUAL),
141 	EMX_DEVICE(82571EB_SERDES_QUAD),
142 	EMX_DEVICE(82571EB_QUAD_COPPER),
143 	EMX_DEVICE(82571EB_QUAD_COPPER_BP),
144 	EMX_DEVICE(82571EB_QUAD_COPPER_LP),
145 	EMX_DEVICE(82571EB_QUAD_FIBER),
146 	EMX_DEVICE(82571PT_QUAD_COPPER),
147 
148 	EMX_DEVICE(82572EI_COPPER),
149 	EMX_DEVICE(82572EI_FIBER),
150 	EMX_DEVICE(82572EI_SERDES),
151 	EMX_DEVICE(82572EI),
152 
153 	EMX_DEVICE(82573E),
154 	EMX_DEVICE(82573E_IAMT),
155 	EMX_DEVICE(82573L),
156 
157 	EMX_DEVICE(80003ES2LAN_COPPER_SPT),
158 	EMX_DEVICE(80003ES2LAN_SERDES_SPT),
159 	EMX_DEVICE(80003ES2LAN_COPPER_DPT),
160 	EMX_DEVICE(80003ES2LAN_SERDES_DPT),
161 
162 	EMX_DEVICE(82574L),
163 	EMX_DEVICE(82574LA),
164 
165 	/* required last entry */
166 	EMX_DEVICE_NULL
167 };
168 
169 static int	emx_probe(device_t);
170 static int	emx_attach(device_t);
171 static int	emx_detach(device_t);
172 static int	emx_shutdown(device_t);
173 static int	emx_suspend(device_t);
174 static int	emx_resume(device_t);
175 
176 static void	emx_init(void *);
177 static void	emx_stop(struct emx_softc *);
178 static int	emx_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
179 static void	emx_start(struct ifnet *);
180 #ifdef IFPOLL_ENABLE
181 static void	emx_npoll(struct ifnet *, struct ifpoll_info *);
182 static void	emx_npoll_status(struct ifnet *);
183 static void	emx_npoll_tx(struct ifnet *, void *, int);
184 static void	emx_npoll_rx(struct ifnet *, void *, int);
185 #endif
186 static void	emx_watchdog(struct ifnet *);
187 static void	emx_media_status(struct ifnet *, struct ifmediareq *);
188 static int	emx_media_change(struct ifnet *);
189 static void	emx_timer(void *);
190 static void	emx_serialize(struct ifnet *, enum ifnet_serialize);
191 static void	emx_deserialize(struct ifnet *, enum ifnet_serialize);
192 static int	emx_tryserialize(struct ifnet *, enum ifnet_serialize);
193 #ifdef INVARIANTS
194 static void	emx_serialize_assert(struct ifnet *, enum ifnet_serialize,
195 		    boolean_t);
196 #endif
197 
198 static void	emx_intr(void *);
199 static void	emx_intr_mask(void *);
200 static void	emx_intr_body(struct emx_softc *, boolean_t);
201 static void	emx_rxeof(struct emx_softc *, int, int);
202 static void	emx_txeof(struct emx_softc *);
203 static void	emx_tx_collect(struct emx_softc *);
204 static void	emx_tx_purge(struct emx_softc *);
205 static void	emx_enable_intr(struct emx_softc *);
206 static void	emx_disable_intr(struct emx_softc *);
207 
208 static int	emx_dma_alloc(struct emx_softc *);
209 static void	emx_dma_free(struct emx_softc *);
210 static void	emx_init_tx_ring(struct emx_softc *);
211 static int	emx_init_rx_ring(struct emx_softc *, struct emx_rxdata *);
212 static void	emx_free_rx_ring(struct emx_softc *, struct emx_rxdata *);
213 static int	emx_create_tx_ring(struct emx_softc *);
214 static int	emx_create_rx_ring(struct emx_softc *, struct emx_rxdata *);
215 static void	emx_destroy_tx_ring(struct emx_softc *, int);
216 static void	emx_destroy_rx_ring(struct emx_softc *,
217 		    struct emx_rxdata *, int);
218 static int	emx_newbuf(struct emx_softc *, struct emx_rxdata *, int, int);
219 static int	emx_encap(struct emx_softc *, struct mbuf **);
220 static int	emx_txcsum(struct emx_softc *, struct mbuf *,
221 		    uint32_t *, uint32_t *);
222 static int	emx_tso_pullup(struct emx_softc *, struct mbuf **);
223 static int	emx_tso_setup(struct emx_softc *, struct mbuf *,
224 		    uint32_t *, uint32_t *);
225 
226 static int 	emx_is_valid_eaddr(const uint8_t *);
227 static int	emx_reset(struct emx_softc *);
228 static void	emx_setup_ifp(struct emx_softc *);
229 static void	emx_init_tx_unit(struct emx_softc *);
230 static void	emx_init_rx_unit(struct emx_softc *);
231 static void	emx_update_stats(struct emx_softc *);
232 static void	emx_set_promisc(struct emx_softc *);
233 static void	emx_disable_promisc(struct emx_softc *);
234 static void	emx_set_multi(struct emx_softc *);
235 static void	emx_update_link_status(struct emx_softc *);
236 static void	emx_smartspeed(struct emx_softc *);
237 static void	emx_set_itr(struct emx_softc *, uint32_t);
238 static void	emx_disable_aspm(struct emx_softc *);
239 
240 static void	emx_print_debug_info(struct emx_softc *);
241 static void	emx_print_nvm_info(struct emx_softc *);
242 static void	emx_print_hw_stats(struct emx_softc *);
243 
244 static int	emx_sysctl_stats(SYSCTL_HANDLER_ARGS);
245 static int	emx_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
246 static int	emx_sysctl_int_throttle(SYSCTL_HANDLER_ARGS);
247 static int	emx_sysctl_int_tx_nsegs(SYSCTL_HANDLER_ARGS);
248 #ifdef IFPOLL_ENABLE
249 static int	emx_sysctl_npoll_rxoff(SYSCTL_HANDLER_ARGS);
250 static int	emx_sysctl_npoll_txoff(SYSCTL_HANDLER_ARGS);
251 #endif
252 static void	emx_add_sysctl(struct emx_softc *);
253 
254 static void	emx_serialize_skipmain(struct emx_softc *);
255 static void	emx_deserialize_skipmain(struct emx_softc *);
256 
257 /* Management and WOL Support */
258 static void	emx_get_mgmt(struct emx_softc *);
259 static void	emx_rel_mgmt(struct emx_softc *);
260 static void	emx_get_hw_control(struct emx_softc *);
261 static void	emx_rel_hw_control(struct emx_softc *);
262 static void	emx_enable_wol(device_t);
263 
264 static device_method_t emx_methods[] = {
265 	/* Device interface */
266 	DEVMETHOD(device_probe,		emx_probe),
267 	DEVMETHOD(device_attach,	emx_attach),
268 	DEVMETHOD(device_detach,	emx_detach),
269 	DEVMETHOD(device_shutdown,	emx_shutdown),
270 	DEVMETHOD(device_suspend,	emx_suspend),
271 	DEVMETHOD(device_resume,	emx_resume),
272 	{ 0, 0 }
273 };
274 
275 static driver_t emx_driver = {
276 	"emx",
277 	emx_methods,
278 	sizeof(struct emx_softc),
279 };
280 
281 static devclass_t emx_devclass;
282 
283 DECLARE_DUMMY_MODULE(if_emx);
284 MODULE_DEPEND(emx, ig_hal, 1, 1, 1);
285 DRIVER_MODULE(if_emx, pci, emx_driver, emx_devclass, NULL, NULL);
286 
287 /*
288  * Tunables
289  */
290 static int	emx_int_throttle_ceil = EMX_DEFAULT_ITR;
291 static int	emx_rxd = EMX_DEFAULT_RXD;
292 static int	emx_txd = EMX_DEFAULT_TXD;
293 static int	emx_smart_pwr_down = 0;
294 static int	emx_rxr = 0;
295 
296 /* Controls whether promiscuous also shows bad packets */
297 static int	emx_debug_sbp = 0;
298 
299 static int	emx_82573_workaround = 1;
300 static int	emx_msi_enable = 1;
301 
302 TUNABLE_INT("hw.emx.int_throttle_ceil", &emx_int_throttle_ceil);
303 TUNABLE_INT("hw.emx.rxd", &emx_rxd);
304 TUNABLE_INT("hw.emx.rxr", &emx_rxr);
305 TUNABLE_INT("hw.emx.txd", &emx_txd);
306 TUNABLE_INT("hw.emx.smart_pwr_down", &emx_smart_pwr_down);
307 TUNABLE_INT("hw.emx.sbp", &emx_debug_sbp);
308 TUNABLE_INT("hw.emx.82573_workaround", &emx_82573_workaround);
309 TUNABLE_INT("hw.emx.msi.enable", &emx_msi_enable);
310 
311 /* Global used in WOL setup with multiport cards */
312 static int	emx_global_quad_port_a = 0;
313 
314 /* Set this to one to display debug statistics */
315 static int	emx_display_debug_stats = 0;
316 
317 #if !defined(KTR_IF_EMX)
318 #define KTR_IF_EMX	KTR_ALL
319 #endif
320 KTR_INFO_MASTER(if_emx);
321 KTR_INFO(KTR_IF_EMX, if_emx, intr_beg, 0, "intr begin");
322 KTR_INFO(KTR_IF_EMX, if_emx, intr_end, 1, "intr end");
323 KTR_INFO(KTR_IF_EMX, if_emx, pkt_receive, 4, "rx packet");
324 KTR_INFO(KTR_IF_EMX, if_emx, pkt_txqueue, 5, "tx packet");
325 KTR_INFO(KTR_IF_EMX, if_emx, pkt_txclean, 6, "tx clean");
326 #define logif(name)	KTR_LOG(if_emx_ ## name)
327 
328 static __inline void
329 emx_setup_rxdesc(emx_rxdesc_t *rxd, const struct emx_rxbuf *rxbuf)
330 {
331 	rxd->rxd_bufaddr = htole64(rxbuf->paddr);
332 	/* DD bit must be cleared */
333 	rxd->rxd_staterr = 0;
334 }
335 
336 static __inline void
337 emx_rxcsum(uint32_t staterr, struct mbuf *mp)
338 {
339 	/* Ignore Checksum bit is set */
340 	if (staterr & E1000_RXD_STAT_IXSM)
341 		return;
342 
343 	if ((staterr & (E1000_RXD_STAT_IPCS | E1000_RXDEXT_STATERR_IPE)) ==
344 	    E1000_RXD_STAT_IPCS)
345 		mp->m_pkthdr.csum_flags |= CSUM_IP_CHECKED | CSUM_IP_VALID;
346 
347 	if ((staterr & (E1000_RXD_STAT_TCPCS | E1000_RXDEXT_STATERR_TCPE)) ==
348 	    E1000_RXD_STAT_TCPCS) {
349 		mp->m_pkthdr.csum_flags |= CSUM_DATA_VALID |
350 					   CSUM_PSEUDO_HDR |
351 					   CSUM_FRAG_NOT_CHECKED;
352 		mp->m_pkthdr.csum_data = htons(0xffff);
353 	}
354 }
355 
356 static __inline struct pktinfo *
357 emx_rssinfo(struct mbuf *m, struct pktinfo *pi,
358 	    uint32_t mrq, uint32_t hash, uint32_t staterr)
359 {
360 	switch (mrq & EMX_RXDMRQ_RSSTYPE_MASK) {
361 	case EMX_RXDMRQ_IPV4_TCP:
362 		pi->pi_netisr = NETISR_IP;
363 		pi->pi_flags = 0;
364 		pi->pi_l3proto = IPPROTO_TCP;
365 		break;
366 
367 	case EMX_RXDMRQ_IPV6_TCP:
368 		pi->pi_netisr = NETISR_IPV6;
369 		pi->pi_flags = 0;
370 		pi->pi_l3proto = IPPROTO_TCP;
371 		break;
372 
373 	case EMX_RXDMRQ_IPV4:
374 		if (staterr & E1000_RXD_STAT_IXSM)
375 			return NULL;
376 
377 		if ((staterr &
378 		     (E1000_RXD_STAT_TCPCS | E1000_RXDEXT_STATERR_TCPE)) ==
379 		    E1000_RXD_STAT_TCPCS) {
380 			pi->pi_netisr = NETISR_IP;
381 			pi->pi_flags = 0;
382 			pi->pi_l3proto = IPPROTO_UDP;
383 			break;
384 		}
385 		/* FALL THROUGH */
386 	default:
387 		return NULL;
388 	}
389 
390 	m->m_flags |= M_HASH;
391 	m->m_pkthdr.hash = toeplitz_hash(hash);
392 	return pi;
393 }
394 
395 static int
396 emx_probe(device_t dev)
397 {
398 	const struct emx_device *d;
399 	uint16_t vid, did;
400 
401 	vid = pci_get_vendor(dev);
402 	did = pci_get_device(dev);
403 
404 	for (d = emx_devices; d->desc != NULL; ++d) {
405 		if (vid == d->vid && did == d->did) {
406 			device_set_desc(dev, d->desc);
407 			device_set_async_attach(dev, TRUE);
408 			return 0;
409 		}
410 	}
411 	return ENXIO;
412 }
413 
414 static int
415 emx_attach(device_t dev)
416 {
417 	struct emx_softc *sc = device_get_softc(dev);
418 	struct ifnet *ifp = &sc->arpcom.ac_if;
419 	int error = 0, i, throttle, msi_enable;
420 	u_int intr_flags;
421 	uint16_t eeprom_data, device_id, apme_mask;
422 	driver_intr_t *intr_func;
423 #ifdef IFPOLL_ENABLE
424 	int offset, offset_def;
425 #endif
426 
427 	lwkt_serialize_init(&sc->main_serialize);
428 	lwkt_serialize_init(&sc->tx_serialize);
429 	for (i = 0; i < EMX_NRX_RING; ++i)
430 		lwkt_serialize_init(&sc->rx_data[i].rx_serialize);
431 
432 	i = 0;
433 	sc->serializes[i++] = &sc->main_serialize;
434 	sc->serializes[i++] = &sc->tx_serialize;
435 	sc->serializes[i++] = &sc->rx_data[0].rx_serialize;
436 	sc->serializes[i++] = &sc->rx_data[1].rx_serialize;
437 	KKASSERT(i == EMX_NSERIALIZE);
438 
439 	callout_init_mp(&sc->timer);
440 
441 	sc->dev = sc->osdep.dev = dev;
442 
443 	/*
444 	 * Determine hardware and mac type
445 	 */
446 	sc->hw.vendor_id = pci_get_vendor(dev);
447 	sc->hw.device_id = pci_get_device(dev);
448 	sc->hw.revision_id = pci_get_revid(dev);
449 	sc->hw.subsystem_vendor_id = pci_get_subvendor(dev);
450 	sc->hw.subsystem_device_id = pci_get_subdevice(dev);
451 
452 	if (e1000_set_mac_type(&sc->hw))
453 		return ENXIO;
454 
455 	/*
456 	 * Pullup extra 4bytes into the first data segment, see:
457 	 * 82571/82572 specification update errata #7
458 	 *
459 	 * NOTE:
460 	 * 4bytes instead of 2bytes, which are mentioned in the errata,
461 	 * are pulled; mainly to keep rest of the data properly aligned.
462 	 */
463 	if (sc->hw.mac.type == e1000_82571 || sc->hw.mac.type == e1000_82572)
464 		sc->flags |= EMX_FLAG_TSO_PULLEX;
465 
466 	/* Enable bus mastering */
467 	pci_enable_busmaster(dev);
468 
469 	/*
470 	 * Allocate IO memory
471 	 */
472 	sc->memory_rid = EMX_BAR_MEM;
473 	sc->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
474 					    &sc->memory_rid, RF_ACTIVE);
475 	if (sc->memory == NULL) {
476 		device_printf(dev, "Unable to allocate bus resource: memory\n");
477 		error = ENXIO;
478 		goto fail;
479 	}
480 	sc->osdep.mem_bus_space_tag = rman_get_bustag(sc->memory);
481 	sc->osdep.mem_bus_space_handle = rman_get_bushandle(sc->memory);
482 
483 	/* XXX This is quite goofy, it is not actually used */
484 	sc->hw.hw_addr = (uint8_t *)&sc->osdep.mem_bus_space_handle;
485 
486 	/*
487 	 * Don't enable MSI-X on 82574, see:
488 	 * 82574 specification update errata #15
489 	 *
490 	 * Don't enable MSI on 82571/82572, see:
491 	 * 82571/82572 specification update errata #63
492 	 */
493 	msi_enable = emx_msi_enable;
494 	if (msi_enable &&
495 	    (sc->hw.mac.type == e1000_82571 ||
496 	     sc->hw.mac.type == e1000_82572))
497 		msi_enable = 0;
498 
499 	/*
500 	 * Allocate interrupt
501 	 */
502 	sc->intr_type = pci_alloc_1intr(dev, msi_enable,
503 	    &sc->intr_rid, &intr_flags);
504 
505 	if (sc->intr_type == PCI_INTR_TYPE_LEGACY) {
506 		int unshared;
507 
508 		unshared = device_getenv_int(dev, "irq.unshared", 0);
509 		if (!unshared) {
510 			sc->flags |= EMX_FLAG_SHARED_INTR;
511 			if (bootverbose)
512 				device_printf(dev, "IRQ shared\n");
513 		} else {
514 			intr_flags &= ~RF_SHAREABLE;
515 			if (bootverbose)
516 				device_printf(dev, "IRQ unshared\n");
517 		}
518 	}
519 
520 	sc->intr_res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &sc->intr_rid,
521 	    intr_flags);
522 	if (sc->intr_res == NULL) {
523 		device_printf(dev, "Unable to allocate bus resource: "
524 		    "interrupt\n");
525 		error = ENXIO;
526 		goto fail;
527 	}
528 
529 	/* Save PCI command register for Shared Code */
530 	sc->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
531 	sc->hw.back = &sc->osdep;
532 
533 	/* Do Shared Code initialization */
534 	if (e1000_setup_init_funcs(&sc->hw, TRUE)) {
535 		device_printf(dev, "Setup of Shared code failed\n");
536 		error = ENXIO;
537 		goto fail;
538 	}
539 	e1000_get_bus_info(&sc->hw);
540 
541 	sc->hw.mac.autoneg = EMX_DO_AUTO_NEG;
542 	sc->hw.phy.autoneg_wait_to_complete = FALSE;
543 	sc->hw.phy.autoneg_advertised = EMX_AUTONEG_ADV_DEFAULT;
544 
545 	/*
546 	 * Interrupt throttle rate
547 	 */
548 	throttle = device_getenv_int(dev, "int_throttle_ceil",
549 	    emx_int_throttle_ceil);
550 	if (throttle == 0) {
551 		sc->int_throttle_ceil = 0;
552 	} else {
553 		if (throttle < 0)
554 			throttle = EMX_DEFAULT_ITR;
555 
556 		/* Recalculate the tunable value to get the exact frequency. */
557 		throttle = 1000000000 / 256 / throttle;
558 
559 		/* Upper 16bits of ITR is reserved and should be zero */
560 		if (throttle & 0xffff0000)
561 			throttle = 1000000000 / 256 / EMX_DEFAULT_ITR;
562 
563 		sc->int_throttle_ceil = 1000000000 / 256 / throttle;
564 	}
565 
566 	e1000_init_script_state_82541(&sc->hw, TRUE);
567 	e1000_set_tbi_compatibility_82543(&sc->hw, TRUE);
568 
569 	/* Copper options */
570 	if (sc->hw.phy.media_type == e1000_media_type_copper) {
571 		sc->hw.phy.mdix = EMX_AUTO_ALL_MODES;
572 		sc->hw.phy.disable_polarity_correction = FALSE;
573 		sc->hw.phy.ms_type = EMX_MASTER_SLAVE;
574 	}
575 
576 	/* Set the frame limits assuming standard ethernet sized frames. */
577 	sc->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHER_CRC_LEN;
578 	sc->min_frame_size = ETHER_MIN_LEN;
579 
580 	/* This controls when hardware reports transmit completion status. */
581 	sc->hw.mac.report_tx_early = 1;
582 
583 	/* Calculate # of RX rings */
584 	sc->rx_ring_cnt = device_getenv_int(dev, "rxr", emx_rxr);
585 	sc->rx_ring_cnt = if_ring_count2(sc->rx_ring_cnt, EMX_NRX_RING);
586 
587 	/* Allocate RX/TX rings' busdma(9) stuffs */
588 	error = emx_dma_alloc(sc);
589 	if (error)
590 		goto fail;
591 
592 	/* Allocate multicast array memory. */
593 	sc->mta = kmalloc(ETH_ADDR_LEN * EMX_MCAST_ADDR_MAX,
594 	    M_DEVBUF, M_WAITOK);
595 
596 	/* Indicate SOL/IDER usage */
597 	if (e1000_check_reset_block(&sc->hw)) {
598 		device_printf(dev,
599 		    "PHY reset is blocked due to SOL/IDER session.\n");
600 	}
601 
602 	/*
603 	 * Start from a known state, this is important in reading the
604 	 * nvm and mac from that.
605 	 */
606 	e1000_reset_hw(&sc->hw);
607 
608 	/* Make sure we have a good EEPROM before we read from it */
609 	if (e1000_validate_nvm_checksum(&sc->hw) < 0) {
610 		/*
611 		 * Some PCI-E parts fail the first check due to
612 		 * the link being in sleep state, call it again,
613 		 * if it fails a second time its a real issue.
614 		 */
615 		if (e1000_validate_nvm_checksum(&sc->hw) < 0) {
616 			device_printf(dev,
617 			    "The EEPROM Checksum Is Not Valid\n");
618 			error = EIO;
619 			goto fail;
620 		}
621 	}
622 
623 	/* Copy the permanent MAC address out of the EEPROM */
624 	if (e1000_read_mac_addr(&sc->hw) < 0) {
625 		device_printf(dev, "EEPROM read error while reading MAC"
626 		    " address\n");
627 		error = EIO;
628 		goto fail;
629 	}
630 	if (!emx_is_valid_eaddr(sc->hw.mac.addr)) {
631 		device_printf(dev, "Invalid MAC address\n");
632 		error = EIO;
633 		goto fail;
634 	}
635 
636 	/* Determine if we have to control management hardware */
637 	if (e1000_enable_mng_pass_thru(&sc->hw))
638 		sc->flags |= EMX_FLAG_HAS_MGMT;
639 
640 	/*
641 	 * Setup Wake-on-Lan
642 	 */
643 	apme_mask = EMX_EEPROM_APME;
644 	eeprom_data = 0;
645 	switch (sc->hw.mac.type) {
646 	case e1000_82573:
647 		sc->flags |= EMX_FLAG_HAS_AMT;
648 		/* FALL THROUGH */
649 
650 	case e1000_82571:
651 	case e1000_82572:
652 	case e1000_80003es2lan:
653 		if (sc->hw.bus.func == 1) {
654 			e1000_read_nvm(&sc->hw,
655 			    NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
656 		} else {
657 			e1000_read_nvm(&sc->hw,
658 			    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
659 		}
660 		break;
661 
662 	default:
663 		e1000_read_nvm(&sc->hw,
664 		    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
665 		break;
666 	}
667 	if (eeprom_data & apme_mask)
668 		sc->wol = E1000_WUFC_MAG | E1000_WUFC_MC;
669 
670 	/*
671          * We have the eeprom settings, now apply the special cases
672          * where the eeprom may be wrong or the board won't support
673          * wake on lan on a particular port
674 	 */
675 	device_id = pci_get_device(dev);
676         switch (device_id) {
677 	case E1000_DEV_ID_82571EB_FIBER:
678 		/*
679 		 * Wake events only supported on port A for dual fiber
680 		 * regardless of eeprom setting
681 		 */
682 		if (E1000_READ_REG(&sc->hw, E1000_STATUS) &
683 		    E1000_STATUS_FUNC_1)
684 			sc->wol = 0;
685 		break;
686 
687 	case E1000_DEV_ID_82571EB_QUAD_COPPER:
688 	case E1000_DEV_ID_82571EB_QUAD_FIBER:
689 	case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
690                 /* if quad port sc, disable WoL on all but port A */
691 		if (emx_global_quad_port_a != 0)
692 			sc->wol = 0;
693 		/* Reset for multiple quad port adapters */
694 		if (++emx_global_quad_port_a == 4)
695 			emx_global_quad_port_a = 0;
696                 break;
697 	}
698 
699 	/* XXX disable wol */
700 	sc->wol = 0;
701 
702 #ifdef IFPOLL_ENABLE
703 	/*
704 	 * NPOLLING RX CPU offset
705 	 */
706 	if (sc->rx_ring_cnt == ncpus2) {
707 		offset = 0;
708 	} else {
709 		offset_def = (sc->rx_ring_cnt * device_get_unit(dev)) % ncpus2;
710 		offset = device_getenv_int(dev, "npoll.rxoff", offset_def);
711 		if (offset >= ncpus2 ||
712 		    offset % sc->rx_ring_cnt != 0) {
713 			device_printf(dev, "invalid npoll.rxoff %d, use %d\n",
714 			    offset, offset_def);
715 			offset = offset_def;
716 		}
717 	}
718 	sc->rx_npoll_off = offset;
719 
720 	/*
721 	 * NPOLLING TX CPU offset
722 	 */
723 	offset_def = sc->rx_npoll_off;
724 	offset = device_getenv_int(dev, "npoll.txoff", offset_def);
725 	if (offset >= ncpus2) {
726 		device_printf(dev, "invalid npoll.txoff %d, use %d\n",
727 		    offset, offset_def);
728 		offset = offset_def;
729 	}
730 	sc->tx_npoll_off = offset;
731 #endif
732 
733 	/* Setup OS specific network interface */
734 	emx_setup_ifp(sc);
735 
736 	/* Add sysctl tree, must after em_setup_ifp() */
737 	emx_add_sysctl(sc);
738 
739 	/* Reset the hardware */
740 	error = emx_reset(sc);
741 	if (error) {
742 		device_printf(dev, "Unable to reset the hardware\n");
743 		goto fail;
744 	}
745 
746 	/* Initialize statistics */
747 	emx_update_stats(sc);
748 
749 	sc->hw.mac.get_link_status = 1;
750 	emx_update_link_status(sc);
751 
752 	sc->spare_tx_desc = EMX_TX_SPARE;
753 
754 	/*
755 	 * Keep following relationship between spare_tx_desc, oact_tx_desc
756 	 * and tx_int_nsegs:
757 	 * (spare_tx_desc + EMX_TX_RESERVED) <=
758 	 * oact_tx_desc <= EMX_TX_OACTIVE_MAX <= tx_int_nsegs
759 	 */
760 	sc->oact_tx_desc = sc->num_tx_desc / 8;
761 	if (sc->oact_tx_desc > EMX_TX_OACTIVE_MAX)
762 		sc->oact_tx_desc = EMX_TX_OACTIVE_MAX;
763 	if (sc->oact_tx_desc < sc->spare_tx_desc + EMX_TX_RESERVED)
764 		sc->oact_tx_desc = sc->spare_tx_desc + EMX_TX_RESERVED;
765 
766 	sc->tx_int_nsegs = sc->num_tx_desc / 16;
767 	if (sc->tx_int_nsegs < sc->oact_tx_desc)
768 		sc->tx_int_nsegs = sc->oact_tx_desc;
769 
770 	/* Non-AMT based hardware can now take control from firmware */
771 	if ((sc->flags & (EMX_FLAG_HAS_MGMT | EMX_FLAG_HAS_AMT)) ==
772 	    EMX_FLAG_HAS_MGMT)
773 		emx_get_hw_control(sc);
774 
775 	/*
776 	 * Missing Interrupt Following ICR read:
777 	 *
778 	 * 82571/82572 specification update errata #76
779 	 * 82573 specification update errata #31
780 	 * 82574 specification update errata #12
781 	 */
782 	intr_func = emx_intr;
783 	if ((sc->flags & EMX_FLAG_SHARED_INTR) &&
784 	    (sc->hw.mac.type == e1000_82571 ||
785 	     sc->hw.mac.type == e1000_82572 ||
786 	     sc->hw.mac.type == e1000_82573 ||
787 	     sc->hw.mac.type == e1000_82574))
788 		intr_func = emx_intr_mask;
789 
790 	error = bus_setup_intr(dev, sc->intr_res, INTR_MPSAFE, intr_func, sc,
791 			       &sc->intr_tag, &sc->main_serialize);
792 	if (error) {
793 		device_printf(dev, "Failed to register interrupt handler");
794 		ether_ifdetach(&sc->arpcom.ac_if);
795 		goto fail;
796 	}
797 
798 	ifp->if_cpuid = rman_get_cpuid(sc->intr_res);
799 	KKASSERT(ifp->if_cpuid >= 0 && ifp->if_cpuid < ncpus);
800 	return (0);
801 fail:
802 	emx_detach(dev);
803 	return (error);
804 }
805 
806 static int
807 emx_detach(device_t dev)
808 {
809 	struct emx_softc *sc = device_get_softc(dev);
810 
811 	if (device_is_attached(dev)) {
812 		struct ifnet *ifp = &sc->arpcom.ac_if;
813 
814 		ifnet_serialize_all(ifp);
815 
816 		emx_stop(sc);
817 
818 		e1000_phy_hw_reset(&sc->hw);
819 
820 		emx_rel_mgmt(sc);
821 		emx_rel_hw_control(sc);
822 
823 		if (sc->wol) {
824 			E1000_WRITE_REG(&sc->hw, E1000_WUC, E1000_WUC_PME_EN);
825 			E1000_WRITE_REG(&sc->hw, E1000_WUFC, sc->wol);
826 			emx_enable_wol(dev);
827 		}
828 
829 		bus_teardown_intr(dev, sc->intr_res, sc->intr_tag);
830 
831 		ifnet_deserialize_all(ifp);
832 
833 		ether_ifdetach(ifp);
834 	} else if (sc->memory != NULL) {
835 		emx_rel_hw_control(sc);
836 	}
837 	bus_generic_detach(dev);
838 
839 	if (sc->intr_res != NULL) {
840 		bus_release_resource(dev, SYS_RES_IRQ, sc->intr_rid,
841 				     sc->intr_res);
842 	}
843 
844 	if (sc->intr_type == PCI_INTR_TYPE_MSI)
845 		pci_release_msi(dev);
846 
847 	if (sc->memory != NULL) {
848 		bus_release_resource(dev, SYS_RES_MEMORY, sc->memory_rid,
849 				     sc->memory);
850 	}
851 
852 	emx_dma_free(sc);
853 
854 	/* Free sysctl tree */
855 	if (sc->sysctl_tree != NULL)
856 		sysctl_ctx_free(&sc->sysctl_ctx);
857 
858 	if (sc->mta != NULL)
859 		kfree(sc->mta, M_DEVBUF);
860 
861 	return (0);
862 }
863 
864 static int
865 emx_shutdown(device_t dev)
866 {
867 	return emx_suspend(dev);
868 }
869 
870 static int
871 emx_suspend(device_t dev)
872 {
873 	struct emx_softc *sc = device_get_softc(dev);
874 	struct ifnet *ifp = &sc->arpcom.ac_if;
875 
876 	ifnet_serialize_all(ifp);
877 
878 	emx_stop(sc);
879 
880 	emx_rel_mgmt(sc);
881 	emx_rel_hw_control(sc);
882 
883 	if (sc->wol) {
884 		E1000_WRITE_REG(&sc->hw, E1000_WUC, E1000_WUC_PME_EN);
885 		E1000_WRITE_REG(&sc->hw, E1000_WUFC, sc->wol);
886 		emx_enable_wol(dev);
887 	}
888 
889 	ifnet_deserialize_all(ifp);
890 
891 	return bus_generic_suspend(dev);
892 }
893 
894 static int
895 emx_resume(device_t dev)
896 {
897 	struct emx_softc *sc = device_get_softc(dev);
898 	struct ifnet *ifp = &sc->arpcom.ac_if;
899 
900 	ifnet_serialize_all(ifp);
901 
902 	emx_init(sc);
903 	emx_get_mgmt(sc);
904 	if_devstart(ifp);
905 
906 	ifnet_deserialize_all(ifp);
907 
908 	return bus_generic_resume(dev);
909 }
910 
911 static void
912 emx_start(struct ifnet *ifp)
913 {
914 	struct emx_softc *sc = ifp->if_softc;
915 	struct mbuf *m_head;
916 
917 	ASSERT_SERIALIZED(&sc->tx_serialize);
918 
919 	if ((ifp->if_flags & (IFF_RUNNING | IFF_OACTIVE)) != IFF_RUNNING)
920 		return;
921 
922 	if (!sc->link_active) {
923 		ifq_purge(&ifp->if_snd);
924 		return;
925 	}
926 
927 	while (!ifq_is_empty(&ifp->if_snd)) {
928 		/* Now do we at least have a minimal? */
929 		if (EMX_IS_OACTIVE(sc)) {
930 			emx_tx_collect(sc);
931 			if (EMX_IS_OACTIVE(sc)) {
932 				ifp->if_flags |= IFF_OACTIVE;
933 				break;
934 			}
935 		}
936 
937 		logif(pkt_txqueue);
938 		m_head = ifq_dequeue(&ifp->if_snd, NULL);
939 		if (m_head == NULL)
940 			break;
941 
942 		if (emx_encap(sc, &m_head)) {
943 			ifp->if_oerrors++;
944 			emx_tx_collect(sc);
945 			continue;
946 		}
947 
948 		/* Send a copy of the frame to the BPF listener */
949 		ETHER_BPF_MTAP(ifp, m_head);
950 
951 		/* Set timeout in case hardware has problems transmitting. */
952 		ifp->if_timer = EMX_TX_TIMEOUT;
953 	}
954 }
955 
956 static int
957 emx_ioctl(struct ifnet *ifp, u_long command, caddr_t data, struct ucred *cr)
958 {
959 	struct emx_softc *sc = ifp->if_softc;
960 	struct ifreq *ifr = (struct ifreq *)data;
961 	uint16_t eeprom_data = 0;
962 	int max_frame_size, mask, reinit;
963 	int error = 0;
964 
965 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
966 
967 	switch (command) {
968 	case SIOCSIFMTU:
969 		switch (sc->hw.mac.type) {
970 		case e1000_82573:
971 			/*
972 			 * 82573 only supports jumbo frames
973 			 * if ASPM is disabled.
974 			 */
975 			e1000_read_nvm(&sc->hw, NVM_INIT_3GIO_3, 1,
976 				       &eeprom_data);
977 			if (eeprom_data & NVM_WORD1A_ASPM_MASK) {
978 				max_frame_size = ETHER_MAX_LEN;
979 				break;
980 			}
981 			/* FALL THROUGH */
982 
983 		/* Limit Jumbo Frame size */
984 		case e1000_82571:
985 		case e1000_82572:
986 		case e1000_82574:
987 		case e1000_80003es2lan:
988 			max_frame_size = 9234;
989 			break;
990 
991 		default:
992 			max_frame_size = MAX_JUMBO_FRAME_SIZE;
993 			break;
994 		}
995 		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
996 		    ETHER_CRC_LEN) {
997 			error = EINVAL;
998 			break;
999 		}
1000 
1001 		ifp->if_mtu = ifr->ifr_mtu;
1002 		sc->max_frame_size = ifp->if_mtu + ETHER_HDR_LEN +
1003 				     ETHER_CRC_LEN;
1004 
1005 		if (ifp->if_flags & IFF_RUNNING)
1006 			emx_init(sc);
1007 		break;
1008 
1009 	case SIOCSIFFLAGS:
1010 		if (ifp->if_flags & IFF_UP) {
1011 			if ((ifp->if_flags & IFF_RUNNING)) {
1012 				if ((ifp->if_flags ^ sc->if_flags) &
1013 				    (IFF_PROMISC | IFF_ALLMULTI)) {
1014 					emx_disable_promisc(sc);
1015 					emx_set_promisc(sc);
1016 				}
1017 			} else {
1018 				emx_init(sc);
1019 			}
1020 		} else if (ifp->if_flags & IFF_RUNNING) {
1021 			emx_stop(sc);
1022 		}
1023 		sc->if_flags = ifp->if_flags;
1024 		break;
1025 
1026 	case SIOCADDMULTI:
1027 	case SIOCDELMULTI:
1028 		if (ifp->if_flags & IFF_RUNNING) {
1029 			emx_disable_intr(sc);
1030 			emx_set_multi(sc);
1031 #ifdef IFPOLL_ENABLE
1032 			if (!(ifp->if_flags & IFF_NPOLLING))
1033 #endif
1034 				emx_enable_intr(sc);
1035 		}
1036 		break;
1037 
1038 	case SIOCSIFMEDIA:
1039 		/* Check SOL/IDER usage */
1040 		if (e1000_check_reset_block(&sc->hw)) {
1041 			device_printf(sc->dev, "Media change is"
1042 			    " blocked due to SOL/IDER session.\n");
1043 			break;
1044 		}
1045 		/* FALL THROUGH */
1046 
1047 	case SIOCGIFMEDIA:
1048 		error = ifmedia_ioctl(ifp, ifr, &sc->media, command);
1049 		break;
1050 
1051 	case SIOCSIFCAP:
1052 		reinit = 0;
1053 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1054 		if (mask & IFCAP_RXCSUM) {
1055 			ifp->if_capenable ^= IFCAP_RXCSUM;
1056 			reinit = 1;
1057 		}
1058 		if (mask & IFCAP_VLAN_HWTAGGING) {
1059 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1060 			reinit = 1;
1061 		}
1062 		if (mask & IFCAP_TXCSUM) {
1063 			ifp->if_capenable ^= IFCAP_TXCSUM;
1064 			if (ifp->if_capenable & IFCAP_TXCSUM)
1065 				ifp->if_hwassist |= EMX_CSUM_FEATURES;
1066 			else
1067 				ifp->if_hwassist &= ~EMX_CSUM_FEATURES;
1068 		}
1069 		if (mask & IFCAP_TSO) {
1070 			ifp->if_capenable ^= IFCAP_TSO;
1071 			if (ifp->if_capenable & IFCAP_TSO)
1072 				ifp->if_hwassist |= CSUM_TSO;
1073 			else
1074 				ifp->if_hwassist &= ~CSUM_TSO;
1075 		}
1076 		if (mask & IFCAP_RSS)
1077 			ifp->if_capenable ^= IFCAP_RSS;
1078 		if (reinit && (ifp->if_flags & IFF_RUNNING))
1079 			emx_init(sc);
1080 		break;
1081 
1082 	default:
1083 		error = ether_ioctl(ifp, command, data);
1084 		break;
1085 	}
1086 	return (error);
1087 }
1088 
1089 static void
1090 emx_watchdog(struct ifnet *ifp)
1091 {
1092 	struct emx_softc *sc = ifp->if_softc;
1093 
1094 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
1095 
1096 	/*
1097 	 * The timer is set to 5 every time start queues a packet.
1098 	 * Then txeof keeps resetting it as long as it cleans at
1099 	 * least one descriptor.
1100 	 * Finally, anytime all descriptors are clean the timer is
1101 	 * set to 0.
1102 	 */
1103 
1104 	if (E1000_READ_REG(&sc->hw, E1000_TDT(0)) ==
1105 	    E1000_READ_REG(&sc->hw, E1000_TDH(0))) {
1106 		/*
1107 		 * If we reach here, all TX jobs are completed and
1108 		 * the TX engine should have been idled for some time.
1109 		 * We don't need to call if_devstart() here.
1110 		 */
1111 		ifp->if_flags &= ~IFF_OACTIVE;
1112 		ifp->if_timer = 0;
1113 		return;
1114 	}
1115 
1116 	/*
1117 	 * If we are in this routine because of pause frames, then
1118 	 * don't reset the hardware.
1119 	 */
1120 	if (E1000_READ_REG(&sc->hw, E1000_STATUS) & E1000_STATUS_TXOFF) {
1121 		ifp->if_timer = EMX_TX_TIMEOUT;
1122 		return;
1123 	}
1124 
1125 	if (e1000_check_for_link(&sc->hw) == 0)
1126 		if_printf(ifp, "watchdog timeout -- resetting\n");
1127 
1128 	ifp->if_oerrors++;
1129 
1130 	emx_init(sc);
1131 
1132 	if (!ifq_is_empty(&ifp->if_snd))
1133 		if_devstart(ifp);
1134 }
1135 
1136 static void
1137 emx_init(void *xsc)
1138 {
1139 	struct emx_softc *sc = xsc;
1140 	struct ifnet *ifp = &sc->arpcom.ac_if;
1141 	device_t dev = sc->dev;
1142 	uint32_t pba;
1143 	int i;
1144 
1145 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
1146 
1147 	emx_stop(sc);
1148 
1149 	/*
1150 	 * Packet Buffer Allocation (PBA)
1151 	 * Writing PBA sets the receive portion of the buffer
1152 	 * the remainder is used for the transmit buffer.
1153 	 */
1154 	switch (sc->hw.mac.type) {
1155 	/* Total Packet Buffer on these is 48K */
1156 	case e1000_82571:
1157 	case e1000_82572:
1158 	case e1000_80003es2lan:
1159 		pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
1160 		break;
1161 
1162 	case e1000_82573: /* 82573: Total Packet Buffer is 32K */
1163 		pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
1164 		break;
1165 
1166 	case e1000_82574:
1167 		pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
1168 		break;
1169 
1170 	default:
1171 		/* Devices before 82547 had a Packet Buffer of 64K.   */
1172 		if (sc->max_frame_size > 8192)
1173 			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
1174 		else
1175 			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
1176 	}
1177 	E1000_WRITE_REG(&sc->hw, E1000_PBA, pba);
1178 
1179 	/* Get the latest mac address, User can use a LAA */
1180         bcopy(IF_LLADDR(ifp), sc->hw.mac.addr, ETHER_ADDR_LEN);
1181 
1182 	/* Put the address into the Receive Address Array */
1183 	e1000_rar_set(&sc->hw, sc->hw.mac.addr, 0);
1184 
1185 	/*
1186 	 * With the 82571 sc, RAR[0] may be overwritten
1187 	 * when the other port is reset, we make a duplicate
1188 	 * in RAR[14] for that eventuality, this assures
1189 	 * the interface continues to function.
1190 	 */
1191 	if (sc->hw.mac.type == e1000_82571) {
1192 		e1000_set_laa_state_82571(&sc->hw, TRUE);
1193 		e1000_rar_set(&sc->hw, sc->hw.mac.addr,
1194 		    E1000_RAR_ENTRIES - 1);
1195 	}
1196 
1197 	/* Initialize the hardware */
1198 	if (emx_reset(sc)) {
1199 		device_printf(dev, "Unable to reset the hardware\n");
1200 		/* XXX emx_stop()? */
1201 		return;
1202 	}
1203 	emx_update_link_status(sc);
1204 
1205 	/* Setup VLAN support, basic and offload if available */
1206 	E1000_WRITE_REG(&sc->hw, E1000_VET, ETHERTYPE_VLAN);
1207 
1208 	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1209 		uint32_t ctrl;
1210 
1211 		ctrl = E1000_READ_REG(&sc->hw, E1000_CTRL);
1212 		ctrl |= E1000_CTRL_VME;
1213 		E1000_WRITE_REG(&sc->hw, E1000_CTRL, ctrl);
1214 	}
1215 
1216 	/* Configure for OS presence */
1217 	emx_get_mgmt(sc);
1218 
1219 	/* Prepare transmit descriptors and buffers */
1220 	emx_init_tx_ring(sc);
1221 	emx_init_tx_unit(sc);
1222 
1223 	/* Setup Multicast table */
1224 	emx_set_multi(sc);
1225 
1226 	/* Prepare receive descriptors and buffers */
1227 	for (i = 0; i < sc->rx_ring_cnt; ++i) {
1228 		if (emx_init_rx_ring(sc, &sc->rx_data[i])) {
1229 			device_printf(dev,
1230 			    "Could not setup receive structures\n");
1231 			emx_stop(sc);
1232 			return;
1233 		}
1234 	}
1235 	emx_init_rx_unit(sc);
1236 
1237 	/* Don't lose promiscuous settings */
1238 	emx_set_promisc(sc);
1239 
1240 	ifp->if_flags |= IFF_RUNNING;
1241 	ifp->if_flags &= ~IFF_OACTIVE;
1242 
1243 	callout_reset(&sc->timer, hz, emx_timer, sc);
1244 	e1000_clear_hw_cntrs_base_generic(&sc->hw);
1245 
1246 	/* MSI/X configuration for 82574 */
1247 	if (sc->hw.mac.type == e1000_82574) {
1248 		int tmp;
1249 
1250 		tmp = E1000_READ_REG(&sc->hw, E1000_CTRL_EXT);
1251 		tmp |= E1000_CTRL_EXT_PBA_CLR;
1252 		E1000_WRITE_REG(&sc->hw, E1000_CTRL_EXT, tmp);
1253 		/*
1254 		 * XXX MSIX
1255 		 * Set the IVAR - interrupt vector routing.
1256 		 * Each nibble represents a vector, high bit
1257 		 * is enable, other 3 bits are the MSIX table
1258 		 * entry, we map RXQ0 to 0, TXQ0 to 1, and
1259 		 * Link (other) to 2, hence the magic number.
1260 		 */
1261 		E1000_WRITE_REG(&sc->hw, E1000_IVAR, 0x800A0908);
1262 	}
1263 
1264 #ifdef IFPOLL_ENABLE
1265 	/*
1266 	 * Only enable interrupts if we are not polling, make sure
1267 	 * they are off otherwise.
1268 	 */
1269 	if (ifp->if_flags & IFF_NPOLLING)
1270 		emx_disable_intr(sc);
1271 	else
1272 #endif /* IFPOLL_ENABLE */
1273 		emx_enable_intr(sc);
1274 
1275 	/* AMT based hardware can now take control from firmware */
1276 	if ((sc->flags & (EMX_FLAG_HAS_MGMT | EMX_FLAG_HAS_AMT)) ==
1277 	    (EMX_FLAG_HAS_MGMT | EMX_FLAG_HAS_AMT))
1278 		emx_get_hw_control(sc);
1279 
1280 	/* Don't reset the phy next time init gets called */
1281 	sc->hw.phy.reset_disable = TRUE;
1282 }
1283 
1284 static void
1285 emx_intr(void *xsc)
1286 {
1287 	emx_intr_body(xsc, TRUE);
1288 }
1289 
1290 static void
1291 emx_intr_body(struct emx_softc *sc, boolean_t chk_asserted)
1292 {
1293 	struct ifnet *ifp = &sc->arpcom.ac_if;
1294 	uint32_t reg_icr;
1295 
1296 	logif(intr_beg);
1297 	ASSERT_SERIALIZED(&sc->main_serialize);
1298 
1299 	reg_icr = E1000_READ_REG(&sc->hw, E1000_ICR);
1300 
1301 	if (chk_asserted && (reg_icr & E1000_ICR_INT_ASSERTED) == 0) {
1302 		logif(intr_end);
1303 		return;
1304 	}
1305 
1306 	/*
1307 	 * XXX: some laptops trigger several spurious interrupts
1308 	 * on emx(4) when in the resume cycle. The ICR register
1309 	 * reports all-ones value in this case. Processing such
1310 	 * interrupts would lead to a freeze. I don't know why.
1311 	 */
1312 	if (reg_icr == 0xffffffff) {
1313 		logif(intr_end);
1314 		return;
1315 	}
1316 
1317 	if (ifp->if_flags & IFF_RUNNING) {
1318 		if (reg_icr &
1319 		    (E1000_ICR_RXT0 | E1000_ICR_RXDMT0 | E1000_ICR_RXO)) {
1320 			int i;
1321 
1322 			for (i = 0; i < sc->rx_ring_cnt; ++i) {
1323 				lwkt_serialize_enter(
1324 				&sc->rx_data[i].rx_serialize);
1325 				emx_rxeof(sc, i, -1);
1326 				lwkt_serialize_exit(
1327 				&sc->rx_data[i].rx_serialize);
1328 			}
1329 		}
1330 		if (reg_icr & E1000_ICR_TXDW) {
1331 			lwkt_serialize_enter(&sc->tx_serialize);
1332 			emx_txeof(sc);
1333 			if (!ifq_is_empty(&ifp->if_snd))
1334 				if_devstart(ifp);
1335 			lwkt_serialize_exit(&sc->tx_serialize);
1336 		}
1337 	}
1338 
1339 	/* Link status change */
1340 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1341 		emx_serialize_skipmain(sc);
1342 
1343 		callout_stop(&sc->timer);
1344 		sc->hw.mac.get_link_status = 1;
1345 		emx_update_link_status(sc);
1346 
1347 		/* Deal with TX cruft when link lost */
1348 		emx_tx_purge(sc);
1349 
1350 		callout_reset(&sc->timer, hz, emx_timer, sc);
1351 
1352 		emx_deserialize_skipmain(sc);
1353 	}
1354 
1355 	if (reg_icr & E1000_ICR_RXO)
1356 		sc->rx_overruns++;
1357 
1358 	logif(intr_end);
1359 }
1360 
1361 static void
1362 emx_intr_mask(void *xsc)
1363 {
1364 	struct emx_softc *sc = xsc;
1365 
1366 	E1000_WRITE_REG(&sc->hw, E1000_IMC, 0xffffffff);
1367 	/*
1368 	 * NOTE:
1369 	 * ICR.INT_ASSERTED bit will never be set if IMS is 0,
1370 	 * so don't check it.
1371 	 */
1372 	emx_intr_body(sc, FALSE);
1373 	E1000_WRITE_REG(&sc->hw, E1000_IMS, IMS_ENABLE_MASK);
1374 }
1375 
1376 static void
1377 emx_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1378 {
1379 	struct emx_softc *sc = ifp->if_softc;
1380 
1381 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
1382 
1383 	emx_update_link_status(sc);
1384 
1385 	ifmr->ifm_status = IFM_AVALID;
1386 	ifmr->ifm_active = IFM_ETHER;
1387 
1388 	if (!sc->link_active)
1389 		return;
1390 
1391 	ifmr->ifm_status |= IFM_ACTIVE;
1392 
1393 	if (sc->hw.phy.media_type == e1000_media_type_fiber ||
1394 	    sc->hw.phy.media_type == e1000_media_type_internal_serdes) {
1395 		ifmr->ifm_active |= IFM_1000_SX | IFM_FDX;
1396 	} else {
1397 		switch (sc->link_speed) {
1398 		case 10:
1399 			ifmr->ifm_active |= IFM_10_T;
1400 			break;
1401 		case 100:
1402 			ifmr->ifm_active |= IFM_100_TX;
1403 			break;
1404 
1405 		case 1000:
1406 			ifmr->ifm_active |= IFM_1000_T;
1407 			break;
1408 		}
1409 		if (sc->link_duplex == FULL_DUPLEX)
1410 			ifmr->ifm_active |= IFM_FDX;
1411 		else
1412 			ifmr->ifm_active |= IFM_HDX;
1413 	}
1414 }
1415 
1416 static int
1417 emx_media_change(struct ifnet *ifp)
1418 {
1419 	struct emx_softc *sc = ifp->if_softc;
1420 	struct ifmedia *ifm = &sc->media;
1421 
1422 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
1423 
1424 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1425 		return (EINVAL);
1426 
1427 	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1428 	case IFM_AUTO:
1429 		sc->hw.mac.autoneg = EMX_DO_AUTO_NEG;
1430 		sc->hw.phy.autoneg_advertised = EMX_AUTONEG_ADV_DEFAULT;
1431 		break;
1432 
1433 	case IFM_1000_LX:
1434 	case IFM_1000_SX:
1435 	case IFM_1000_T:
1436 		sc->hw.mac.autoneg = EMX_DO_AUTO_NEG;
1437 		sc->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1438 		break;
1439 
1440 	case IFM_100_TX:
1441 		sc->hw.mac.autoneg = FALSE;
1442 		sc->hw.phy.autoneg_advertised = 0;
1443 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1444 			sc->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1445 		else
1446 			sc->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1447 		break;
1448 
1449 	case IFM_10_T:
1450 		sc->hw.mac.autoneg = FALSE;
1451 		sc->hw.phy.autoneg_advertised = 0;
1452 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1453 			sc->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1454 		else
1455 			sc->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1456 		break;
1457 
1458 	default:
1459 		if_printf(ifp, "Unsupported media type\n");
1460 		break;
1461 	}
1462 
1463 	/*
1464 	 * As the speed/duplex settings my have changed we need to
1465 	 * reset the PHY.
1466 	 */
1467 	sc->hw.phy.reset_disable = FALSE;
1468 
1469 	emx_init(sc);
1470 
1471 	return (0);
1472 }
1473 
1474 static int
1475 emx_encap(struct emx_softc *sc, struct mbuf **m_headp)
1476 {
1477 	bus_dma_segment_t segs[EMX_MAX_SCATTER];
1478 	bus_dmamap_t map;
1479 	struct emx_txbuf *tx_buffer, *tx_buffer_mapped;
1480 	struct e1000_tx_desc *ctxd = NULL;
1481 	struct mbuf *m_head = *m_headp;
1482 	uint32_t txd_upper, txd_lower, cmd = 0;
1483 	int maxsegs, nsegs, i, j, first, last = 0, error;
1484 
1485 	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1486 		error = emx_tso_pullup(sc, m_headp);
1487 		if (error)
1488 			return error;
1489 		m_head = *m_headp;
1490 	}
1491 
1492 	txd_upper = txd_lower = 0;
1493 
1494 	/*
1495 	 * Capture the first descriptor index, this descriptor
1496 	 * will have the index of the EOP which is the only one
1497 	 * that now gets a DONE bit writeback.
1498 	 */
1499 	first = sc->next_avail_tx_desc;
1500 	tx_buffer = &sc->tx_buf[first];
1501 	tx_buffer_mapped = tx_buffer;
1502 	map = tx_buffer->map;
1503 
1504 	maxsegs = sc->num_tx_desc_avail - EMX_TX_RESERVED;
1505 	KASSERT(maxsegs >= sc->spare_tx_desc, ("not enough spare TX desc"));
1506 	if (maxsegs > EMX_MAX_SCATTER)
1507 		maxsegs = EMX_MAX_SCATTER;
1508 
1509 	error = bus_dmamap_load_mbuf_defrag(sc->txtag, map, m_headp,
1510 			segs, maxsegs, &nsegs, BUS_DMA_NOWAIT);
1511 	if (error) {
1512 		m_freem(*m_headp);
1513 		*m_headp = NULL;
1514 		return error;
1515 	}
1516         bus_dmamap_sync(sc->txtag, map, BUS_DMASYNC_PREWRITE);
1517 
1518 	m_head = *m_headp;
1519 	sc->tx_nsegs += nsegs;
1520 
1521 	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1522 		/* TSO will consume one TX desc */
1523 		sc->tx_nsegs += emx_tso_setup(sc, m_head,
1524 		    &txd_upper, &txd_lower);
1525 	} else if (m_head->m_pkthdr.csum_flags & EMX_CSUM_FEATURES) {
1526 		/* TX csum offloading will consume one TX desc */
1527 		sc->tx_nsegs += emx_txcsum(sc, m_head, &txd_upper, &txd_lower);
1528 	}
1529 	i = sc->next_avail_tx_desc;
1530 
1531 	/* Set up our transmit descriptors */
1532 	for (j = 0; j < nsegs; j++) {
1533 		tx_buffer = &sc->tx_buf[i];
1534 		ctxd = &sc->tx_desc_base[i];
1535 
1536 		ctxd->buffer_addr = htole64(segs[j].ds_addr);
1537 		ctxd->lower.data = htole32(E1000_TXD_CMD_IFCS |
1538 					   txd_lower | segs[j].ds_len);
1539 		ctxd->upper.data = htole32(txd_upper);
1540 
1541 		last = i;
1542 		if (++i == sc->num_tx_desc)
1543 			i = 0;
1544 	}
1545 
1546 	sc->next_avail_tx_desc = i;
1547 
1548 	KKASSERT(sc->num_tx_desc_avail > nsegs);
1549 	sc->num_tx_desc_avail -= nsegs;
1550 
1551         /* Handle VLAN tag */
1552 	if (m_head->m_flags & M_VLANTAG) {
1553 		/* Set the vlan id. */
1554 		ctxd->upper.fields.special =
1555 		    htole16(m_head->m_pkthdr.ether_vlantag);
1556 
1557 		/* Tell hardware to add tag */
1558 		ctxd->lower.data |= htole32(E1000_TXD_CMD_VLE);
1559 	}
1560 
1561 	tx_buffer->m_head = m_head;
1562 	tx_buffer_mapped->map = tx_buffer->map;
1563 	tx_buffer->map = map;
1564 
1565 	if (sc->tx_nsegs >= sc->tx_int_nsegs) {
1566 		sc->tx_nsegs = 0;
1567 
1568 		/*
1569 		 * Report Status (RS) is turned on
1570 		 * every tx_int_nsegs descriptors.
1571 		 */
1572 		cmd = E1000_TXD_CMD_RS;
1573 
1574 		/*
1575 		 * Keep track of the descriptor, which will
1576 		 * be written back by hardware.
1577 		 */
1578 		sc->tx_dd[sc->tx_dd_tail] = last;
1579 		EMX_INC_TXDD_IDX(sc->tx_dd_tail);
1580 		KKASSERT(sc->tx_dd_tail != sc->tx_dd_head);
1581 	}
1582 
1583 	/*
1584 	 * Last Descriptor of Packet needs End Of Packet (EOP)
1585 	 */
1586 	ctxd->lower.data |= htole32(E1000_TXD_CMD_EOP | cmd);
1587 
1588 	/*
1589 	 * Advance the Transmit Descriptor Tail (TDT), this tells
1590 	 * the E1000 that this frame is available to transmit.
1591 	 */
1592 	E1000_WRITE_REG(&sc->hw, E1000_TDT(0), i);
1593 
1594 	return (0);
1595 }
1596 
1597 static void
1598 emx_set_promisc(struct emx_softc *sc)
1599 {
1600 	struct ifnet *ifp = &sc->arpcom.ac_if;
1601 	uint32_t reg_rctl;
1602 
1603 	reg_rctl = E1000_READ_REG(&sc->hw, E1000_RCTL);
1604 
1605 	if (ifp->if_flags & IFF_PROMISC) {
1606 		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1607 		/* Turn this on if you want to see bad packets */
1608 		if (emx_debug_sbp)
1609 			reg_rctl |= E1000_RCTL_SBP;
1610 		E1000_WRITE_REG(&sc->hw, E1000_RCTL, reg_rctl);
1611 	} else if (ifp->if_flags & IFF_ALLMULTI) {
1612 		reg_rctl |= E1000_RCTL_MPE;
1613 		reg_rctl &= ~E1000_RCTL_UPE;
1614 		E1000_WRITE_REG(&sc->hw, E1000_RCTL, reg_rctl);
1615 	}
1616 }
1617 
1618 static void
1619 emx_disable_promisc(struct emx_softc *sc)
1620 {
1621 	uint32_t reg_rctl;
1622 
1623 	reg_rctl = E1000_READ_REG(&sc->hw, E1000_RCTL);
1624 
1625 	reg_rctl &= ~E1000_RCTL_UPE;
1626 	reg_rctl &= ~E1000_RCTL_MPE;
1627 	reg_rctl &= ~E1000_RCTL_SBP;
1628 	E1000_WRITE_REG(&sc->hw, E1000_RCTL, reg_rctl);
1629 }
1630 
1631 static void
1632 emx_set_multi(struct emx_softc *sc)
1633 {
1634 	struct ifnet *ifp = &sc->arpcom.ac_if;
1635 	struct ifmultiaddr *ifma;
1636 	uint32_t reg_rctl = 0;
1637 	uint8_t *mta;
1638 	int mcnt = 0;
1639 
1640 	mta = sc->mta;
1641 	bzero(mta, ETH_ADDR_LEN * EMX_MCAST_ADDR_MAX);
1642 
1643 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1644 		if (ifma->ifma_addr->sa_family != AF_LINK)
1645 			continue;
1646 
1647 		if (mcnt == EMX_MCAST_ADDR_MAX)
1648 			break;
1649 
1650 		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1651 		      &mta[mcnt * ETHER_ADDR_LEN], ETHER_ADDR_LEN);
1652 		mcnt++;
1653 	}
1654 
1655 	if (mcnt >= EMX_MCAST_ADDR_MAX) {
1656 		reg_rctl = E1000_READ_REG(&sc->hw, E1000_RCTL);
1657 		reg_rctl |= E1000_RCTL_MPE;
1658 		E1000_WRITE_REG(&sc->hw, E1000_RCTL, reg_rctl);
1659 	} else {
1660 		e1000_update_mc_addr_list(&sc->hw, mta, mcnt);
1661 	}
1662 }
1663 
1664 /*
1665  * This routine checks for link status and updates statistics.
1666  */
1667 static void
1668 emx_timer(void *xsc)
1669 {
1670 	struct emx_softc *sc = xsc;
1671 	struct ifnet *ifp = &sc->arpcom.ac_if;
1672 
1673 	lwkt_serialize_enter(&sc->main_serialize);
1674 
1675 	emx_update_link_status(sc);
1676 	emx_update_stats(sc);
1677 
1678 	/* Reset LAA into RAR[0] on 82571 */
1679 	if (e1000_get_laa_state_82571(&sc->hw) == TRUE)
1680 		e1000_rar_set(&sc->hw, sc->hw.mac.addr, 0);
1681 
1682 	if (emx_display_debug_stats && (ifp->if_flags & IFF_RUNNING))
1683 		emx_print_hw_stats(sc);
1684 
1685 	emx_smartspeed(sc);
1686 
1687 	callout_reset(&sc->timer, hz, emx_timer, sc);
1688 
1689 	lwkt_serialize_exit(&sc->main_serialize);
1690 }
1691 
1692 static void
1693 emx_update_link_status(struct emx_softc *sc)
1694 {
1695 	struct e1000_hw *hw = &sc->hw;
1696 	struct ifnet *ifp = &sc->arpcom.ac_if;
1697 	device_t dev = sc->dev;
1698 	uint32_t link_check = 0;
1699 
1700 	/* Get the cached link value or read phy for real */
1701 	switch (hw->phy.media_type) {
1702 	case e1000_media_type_copper:
1703 		if (hw->mac.get_link_status) {
1704 			/* Do the work to read phy */
1705 			e1000_check_for_link(hw);
1706 			link_check = !hw->mac.get_link_status;
1707 			if (link_check) /* ESB2 fix */
1708 				e1000_cfg_on_link_up(hw);
1709 		} else {
1710 			link_check = TRUE;
1711 		}
1712 		break;
1713 
1714 	case e1000_media_type_fiber:
1715 		e1000_check_for_link(hw);
1716 		link_check = E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU;
1717 		break;
1718 
1719 	case e1000_media_type_internal_serdes:
1720 		e1000_check_for_link(hw);
1721 		link_check = sc->hw.mac.serdes_has_link;
1722 		break;
1723 
1724 	case e1000_media_type_unknown:
1725 	default:
1726 		break;
1727 	}
1728 
1729 	/* Now check for a transition */
1730 	if (link_check && sc->link_active == 0) {
1731 		e1000_get_speed_and_duplex(hw, &sc->link_speed,
1732 		    &sc->link_duplex);
1733 
1734 		/*
1735 		 * Check if we should enable/disable SPEED_MODE bit on
1736 		 * 82571EB/82572EI
1737 		 */
1738 		if (sc->link_speed != SPEED_1000 &&
1739 		    (hw->mac.type == e1000_82571 ||
1740 		     hw->mac.type == e1000_82572)) {
1741 			int tarc0;
1742 
1743 			tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
1744 			tarc0 &= ~EMX_TARC_SPEED_MODE;
1745 			E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
1746 		}
1747 		if (bootverbose) {
1748 			device_printf(dev, "Link is up %d Mbps %s\n",
1749 			    sc->link_speed,
1750 			    ((sc->link_duplex == FULL_DUPLEX) ?
1751 			    "Full Duplex" : "Half Duplex"));
1752 		}
1753 		sc->link_active = 1;
1754 		sc->smartspeed = 0;
1755 		ifp->if_baudrate = sc->link_speed * 1000000;
1756 		ifp->if_link_state = LINK_STATE_UP;
1757 		if_link_state_change(ifp);
1758 	} else if (!link_check && sc->link_active == 1) {
1759 		ifp->if_baudrate = sc->link_speed = 0;
1760 		sc->link_duplex = 0;
1761 		if (bootverbose)
1762 			device_printf(dev, "Link is Down\n");
1763 		sc->link_active = 0;
1764 #if 0
1765 		/* Link down, disable watchdog */
1766 		if->if_timer = 0;
1767 #endif
1768 		ifp->if_link_state = LINK_STATE_DOWN;
1769 		if_link_state_change(ifp);
1770 	}
1771 }
1772 
1773 static void
1774 emx_stop(struct emx_softc *sc)
1775 {
1776 	struct ifnet *ifp = &sc->arpcom.ac_if;
1777 	int i;
1778 
1779 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
1780 
1781 	emx_disable_intr(sc);
1782 
1783 	callout_stop(&sc->timer);
1784 
1785 	ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);
1786 	ifp->if_timer = 0;
1787 
1788 	/*
1789 	 * Disable multiple receive queues.
1790 	 *
1791 	 * NOTE:
1792 	 * We should disable multiple receive queues before
1793 	 * resetting the hardware.
1794 	 */
1795 	E1000_WRITE_REG(&sc->hw, E1000_MRQC, 0);
1796 
1797 	e1000_reset_hw(&sc->hw);
1798 	E1000_WRITE_REG(&sc->hw, E1000_WUC, 0);
1799 
1800 	for (i = 0; i < sc->num_tx_desc; i++) {
1801 		struct emx_txbuf *tx_buffer = &sc->tx_buf[i];
1802 
1803 		if (tx_buffer->m_head != NULL) {
1804 			bus_dmamap_unload(sc->txtag, tx_buffer->map);
1805 			m_freem(tx_buffer->m_head);
1806 			tx_buffer->m_head = NULL;
1807 		}
1808 	}
1809 
1810 	for (i = 0; i < sc->rx_ring_cnt; ++i)
1811 		emx_free_rx_ring(sc, &sc->rx_data[i]);
1812 
1813 	sc->csum_flags = 0;
1814 	sc->csum_lhlen = 0;
1815 	sc->csum_iphlen = 0;
1816 	sc->csum_thlen = 0;
1817 	sc->csum_mss = 0;
1818 	sc->csum_pktlen = 0;
1819 
1820 	sc->tx_dd_head = 0;
1821 	sc->tx_dd_tail = 0;
1822 	sc->tx_nsegs = 0;
1823 }
1824 
1825 static int
1826 emx_reset(struct emx_softc *sc)
1827 {
1828 	device_t dev = sc->dev;
1829 	uint16_t rx_buffer_size;
1830 
1831 	/* Set up smart power down as default off on newer adapters. */
1832 	if (!emx_smart_pwr_down &&
1833 	    (sc->hw.mac.type == e1000_82571 ||
1834 	     sc->hw.mac.type == e1000_82572)) {
1835 		uint16_t phy_tmp = 0;
1836 
1837 		/* Speed up time to link by disabling smart power down. */
1838 		e1000_read_phy_reg(&sc->hw,
1839 		    IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
1840 		phy_tmp &= ~IGP02E1000_PM_SPD;
1841 		e1000_write_phy_reg(&sc->hw,
1842 		    IGP02E1000_PHY_POWER_MGMT, phy_tmp);
1843 	}
1844 
1845 	/*
1846 	 * These parameters control the automatic generation (Tx) and
1847 	 * response (Rx) to Ethernet PAUSE frames.
1848 	 * - High water mark should allow for at least two frames to be
1849 	 *   received after sending an XOFF.
1850 	 * - Low water mark works best when it is very near the high water mark.
1851 	 *   This allows the receiver to restart by sending XON when it has
1852 	 *   drained a bit. Here we use an arbitary value of 1500 which will
1853 	 *   restart after one full frame is pulled from the buffer. There
1854 	 *   could be several smaller frames in the buffer and if so they will
1855 	 *   not trigger the XON until their total number reduces the buffer
1856 	 *   by 1500.
1857 	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
1858 	 */
1859 	rx_buffer_size = (E1000_READ_REG(&sc->hw, E1000_PBA) & 0xffff) << 10;
1860 
1861 	sc->hw.fc.high_water = rx_buffer_size -
1862 			       roundup2(sc->max_frame_size, 1024);
1863 	sc->hw.fc.low_water = sc->hw.fc.high_water - 1500;
1864 
1865 	if (sc->hw.mac.type == e1000_80003es2lan)
1866 		sc->hw.fc.pause_time = 0xFFFF;
1867 	else
1868 		sc->hw.fc.pause_time = EMX_FC_PAUSE_TIME;
1869 	sc->hw.fc.send_xon = TRUE;
1870 	sc->hw.fc.requested_mode = e1000_fc_full;
1871 
1872 	/* Issue a global reset */
1873 	e1000_reset_hw(&sc->hw);
1874 	E1000_WRITE_REG(&sc->hw, E1000_WUC, 0);
1875 	emx_disable_aspm(sc);
1876 
1877 	if (e1000_init_hw(&sc->hw) < 0) {
1878 		device_printf(dev, "Hardware Initialization Failed\n");
1879 		return (EIO);
1880 	}
1881 
1882 	E1000_WRITE_REG(&sc->hw, E1000_VET, ETHERTYPE_VLAN);
1883 	e1000_get_phy_info(&sc->hw);
1884 	e1000_check_for_link(&sc->hw);
1885 
1886 	return (0);
1887 }
1888 
1889 static void
1890 emx_setup_ifp(struct emx_softc *sc)
1891 {
1892 	struct ifnet *ifp = &sc->arpcom.ac_if;
1893 
1894 	if_initname(ifp, device_get_name(sc->dev),
1895 		    device_get_unit(sc->dev));
1896 	ifp->if_softc = sc;
1897 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1898 	ifp->if_init =  emx_init;
1899 	ifp->if_ioctl = emx_ioctl;
1900 	ifp->if_start = emx_start;
1901 #ifdef IFPOLL_ENABLE
1902 	ifp->if_npoll = emx_npoll;
1903 #endif
1904 	ifp->if_watchdog = emx_watchdog;
1905 	ifp->if_serialize = emx_serialize;
1906 	ifp->if_deserialize = emx_deserialize;
1907 	ifp->if_tryserialize = emx_tryserialize;
1908 #ifdef INVARIANTS
1909 	ifp->if_serialize_assert = emx_serialize_assert;
1910 #endif
1911 	ifq_set_maxlen(&ifp->if_snd, sc->num_tx_desc - 1);
1912 	ifq_set_ready(&ifp->if_snd);
1913 
1914 	ether_ifattach(ifp, sc->hw.mac.addr, NULL);
1915 
1916 	ifp->if_capabilities = IFCAP_HWCSUM |
1917 			       IFCAP_VLAN_HWTAGGING |
1918 			       IFCAP_VLAN_MTU |
1919 			       IFCAP_TSO;
1920 	if (sc->rx_ring_cnt > 1)
1921 		ifp->if_capabilities |= IFCAP_RSS;
1922 	ifp->if_capenable = ifp->if_capabilities;
1923 	ifp->if_hwassist = EMX_CSUM_FEATURES | CSUM_TSO;
1924 
1925 	/*
1926 	 * Tell the upper layer(s) we support long frames.
1927 	 */
1928 	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
1929 
1930 	/*
1931 	 * Specify the media types supported by this sc and register
1932 	 * callbacks to update media and link information
1933 	 */
1934 	ifmedia_init(&sc->media, IFM_IMASK,
1935 		     emx_media_change, emx_media_status);
1936 	if (sc->hw.phy.media_type == e1000_media_type_fiber ||
1937 	    sc->hw.phy.media_type == e1000_media_type_internal_serdes) {
1938 		ifmedia_add(&sc->media, IFM_ETHER | IFM_1000_SX | IFM_FDX,
1939 			    0, NULL);
1940 		ifmedia_add(&sc->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
1941 	} else {
1942 		ifmedia_add(&sc->media, IFM_ETHER | IFM_10_T, 0, NULL);
1943 		ifmedia_add(&sc->media, IFM_ETHER | IFM_10_T | IFM_FDX,
1944 			    0, NULL);
1945 		ifmedia_add(&sc->media, IFM_ETHER | IFM_100_TX, 0, NULL);
1946 		ifmedia_add(&sc->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
1947 			    0, NULL);
1948 		if (sc->hw.phy.type != e1000_phy_ife) {
1949 			ifmedia_add(&sc->media,
1950 				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
1951 			ifmedia_add(&sc->media,
1952 				IFM_ETHER | IFM_1000_T, 0, NULL);
1953 		}
1954 	}
1955 	ifmedia_add(&sc->media, IFM_ETHER | IFM_AUTO, 0, NULL);
1956 	ifmedia_set(&sc->media, IFM_ETHER | IFM_AUTO);
1957 }
1958 
1959 /*
1960  * Workaround for SmartSpeed on 82541 and 82547 controllers
1961  */
1962 static void
1963 emx_smartspeed(struct emx_softc *sc)
1964 {
1965 	uint16_t phy_tmp;
1966 
1967 	if (sc->link_active || sc->hw.phy.type != e1000_phy_igp ||
1968 	    sc->hw.mac.autoneg == 0 ||
1969 	    (sc->hw.phy.autoneg_advertised & ADVERTISE_1000_FULL) == 0)
1970 		return;
1971 
1972 	if (sc->smartspeed == 0) {
1973 		/*
1974 		 * If Master/Slave config fault is asserted twice,
1975 		 * we assume back-to-back
1976 		 */
1977 		e1000_read_phy_reg(&sc->hw, PHY_1000T_STATUS, &phy_tmp);
1978 		if (!(phy_tmp & SR_1000T_MS_CONFIG_FAULT))
1979 			return;
1980 		e1000_read_phy_reg(&sc->hw, PHY_1000T_STATUS, &phy_tmp);
1981 		if (phy_tmp & SR_1000T_MS_CONFIG_FAULT) {
1982 			e1000_read_phy_reg(&sc->hw,
1983 			    PHY_1000T_CTRL, &phy_tmp);
1984 			if (phy_tmp & CR_1000T_MS_ENABLE) {
1985 				phy_tmp &= ~CR_1000T_MS_ENABLE;
1986 				e1000_write_phy_reg(&sc->hw,
1987 				    PHY_1000T_CTRL, phy_tmp);
1988 				sc->smartspeed++;
1989 				if (sc->hw.mac.autoneg &&
1990 				    !e1000_phy_setup_autoneg(&sc->hw) &&
1991 				    !e1000_read_phy_reg(&sc->hw,
1992 				     PHY_CONTROL, &phy_tmp)) {
1993 					phy_tmp |= MII_CR_AUTO_NEG_EN |
1994 						   MII_CR_RESTART_AUTO_NEG;
1995 					e1000_write_phy_reg(&sc->hw,
1996 					    PHY_CONTROL, phy_tmp);
1997 				}
1998 			}
1999 		}
2000 		return;
2001 	} else if (sc->smartspeed == EMX_SMARTSPEED_DOWNSHIFT) {
2002 		/* If still no link, perhaps using 2/3 pair cable */
2003 		e1000_read_phy_reg(&sc->hw, PHY_1000T_CTRL, &phy_tmp);
2004 		phy_tmp |= CR_1000T_MS_ENABLE;
2005 		e1000_write_phy_reg(&sc->hw, PHY_1000T_CTRL, phy_tmp);
2006 		if (sc->hw.mac.autoneg &&
2007 		    !e1000_phy_setup_autoneg(&sc->hw) &&
2008 		    !e1000_read_phy_reg(&sc->hw, PHY_CONTROL, &phy_tmp)) {
2009 			phy_tmp |= MII_CR_AUTO_NEG_EN | MII_CR_RESTART_AUTO_NEG;
2010 			e1000_write_phy_reg(&sc->hw, PHY_CONTROL, phy_tmp);
2011 		}
2012 	}
2013 
2014 	/* Restart process after EMX_SMARTSPEED_MAX iterations */
2015 	if (sc->smartspeed++ == EMX_SMARTSPEED_MAX)
2016 		sc->smartspeed = 0;
2017 }
2018 
2019 static int
2020 emx_create_tx_ring(struct emx_softc *sc)
2021 {
2022 	device_t dev = sc->dev;
2023 	struct emx_txbuf *tx_buffer;
2024 	int error, i, tsize, ntxd;
2025 
2026 	/*
2027 	 * Validate number of transmit descriptors.  It must not exceed
2028 	 * hardware maximum, and must be multiple of E1000_DBA_ALIGN.
2029 	 */
2030 	ntxd = device_getenv_int(dev, "txd", emx_txd);
2031 	if ((ntxd * sizeof(struct e1000_tx_desc)) % EMX_DBA_ALIGN != 0 ||
2032 	    ntxd > EMX_MAX_TXD || ntxd < EMX_MIN_TXD) {
2033 		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
2034 		    EMX_DEFAULT_TXD, ntxd);
2035 		sc->num_tx_desc = EMX_DEFAULT_TXD;
2036 	} else {
2037 		sc->num_tx_desc = ntxd;
2038 	}
2039 
2040 	/*
2041 	 * Allocate Transmit Descriptor ring
2042 	 */
2043 	tsize = roundup2(sc->num_tx_desc * sizeof(struct e1000_tx_desc),
2044 			 EMX_DBA_ALIGN);
2045 	sc->tx_desc_base = bus_dmamem_coherent_any(sc->parent_dtag,
2046 				EMX_DBA_ALIGN, tsize, BUS_DMA_WAITOK,
2047 				&sc->tx_desc_dtag, &sc->tx_desc_dmap,
2048 				&sc->tx_desc_paddr);
2049 	if (sc->tx_desc_base == NULL) {
2050 		device_printf(dev, "Unable to allocate tx_desc memory\n");
2051 		return ENOMEM;
2052 	}
2053 
2054 	sc->tx_buf = kmalloc(sizeof(struct emx_txbuf) * sc->num_tx_desc,
2055 			     M_DEVBUF, M_WAITOK | M_ZERO);
2056 
2057 	/*
2058 	 * Create DMA tags for tx buffers
2059 	 */
2060 	error = bus_dma_tag_create(sc->parent_dtag, /* parent */
2061 			1, 0,			/* alignment, bounds */
2062 			BUS_SPACE_MAXADDR,	/* lowaddr */
2063 			BUS_SPACE_MAXADDR,	/* highaddr */
2064 			NULL, NULL,		/* filter, filterarg */
2065 			EMX_TSO_SIZE,		/* maxsize */
2066 			EMX_MAX_SCATTER,	/* nsegments */
2067 			EMX_MAX_SEGSIZE,	/* maxsegsize */
2068 			BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW |
2069 			BUS_DMA_ONEBPAGE,	/* flags */
2070 			&sc->txtag);
2071 	if (error) {
2072 		device_printf(dev, "Unable to allocate TX DMA tag\n");
2073 		kfree(sc->tx_buf, M_DEVBUF);
2074 		sc->tx_buf = NULL;
2075 		return error;
2076 	}
2077 
2078 	/*
2079 	 * Create DMA maps for tx buffers
2080 	 */
2081 	for (i = 0; i < sc->num_tx_desc; i++) {
2082 		tx_buffer = &sc->tx_buf[i];
2083 
2084 		error = bus_dmamap_create(sc->txtag,
2085 					  BUS_DMA_WAITOK | BUS_DMA_ONEBPAGE,
2086 					  &tx_buffer->map);
2087 		if (error) {
2088 			device_printf(dev, "Unable to create TX DMA map\n");
2089 			emx_destroy_tx_ring(sc, i);
2090 			return error;
2091 		}
2092 	}
2093 	return (0);
2094 }
2095 
2096 static void
2097 emx_init_tx_ring(struct emx_softc *sc)
2098 {
2099 	/* Clear the old ring contents */
2100 	bzero(sc->tx_desc_base,
2101 	      sizeof(struct e1000_tx_desc) * sc->num_tx_desc);
2102 
2103 	/* Reset state */
2104 	sc->next_avail_tx_desc = 0;
2105 	sc->next_tx_to_clean = 0;
2106 	sc->num_tx_desc_avail = sc->num_tx_desc;
2107 }
2108 
2109 static void
2110 emx_init_tx_unit(struct emx_softc *sc)
2111 {
2112 	uint32_t tctl, tarc, tipg = 0;
2113 	uint64_t bus_addr;
2114 
2115 	/* Setup the Base and Length of the Tx Descriptor Ring */
2116 	bus_addr = sc->tx_desc_paddr;
2117 	E1000_WRITE_REG(&sc->hw, E1000_TDLEN(0),
2118 	    sc->num_tx_desc * sizeof(struct e1000_tx_desc));
2119 	E1000_WRITE_REG(&sc->hw, E1000_TDBAH(0),
2120 	    (uint32_t)(bus_addr >> 32));
2121 	E1000_WRITE_REG(&sc->hw, E1000_TDBAL(0),
2122 	    (uint32_t)bus_addr);
2123 	/* Setup the HW Tx Head and Tail descriptor pointers */
2124 	E1000_WRITE_REG(&sc->hw, E1000_TDT(0), 0);
2125 	E1000_WRITE_REG(&sc->hw, E1000_TDH(0), 0);
2126 
2127 	/* Set the default values for the Tx Inter Packet Gap timer */
2128 	switch (sc->hw.mac.type) {
2129 	case e1000_80003es2lan:
2130 		tipg = DEFAULT_82543_TIPG_IPGR1;
2131 		tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
2132 		    E1000_TIPG_IPGR2_SHIFT;
2133 		break;
2134 
2135 	default:
2136 		if (sc->hw.phy.media_type == e1000_media_type_fiber ||
2137 		    sc->hw.phy.media_type == e1000_media_type_internal_serdes)
2138 			tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
2139 		else
2140 			tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
2141 		tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
2142 		tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
2143 		break;
2144 	}
2145 
2146 	E1000_WRITE_REG(&sc->hw, E1000_TIPG, tipg);
2147 
2148 	/* NOTE: 0 is not allowed for TIDV */
2149 	E1000_WRITE_REG(&sc->hw, E1000_TIDV, 1);
2150 	E1000_WRITE_REG(&sc->hw, E1000_TADV, 0);
2151 
2152 	if (sc->hw.mac.type == e1000_82571 ||
2153 	    sc->hw.mac.type == e1000_82572) {
2154 		tarc = E1000_READ_REG(&sc->hw, E1000_TARC(0));
2155 		tarc |= EMX_TARC_SPEED_MODE;
2156 		E1000_WRITE_REG(&sc->hw, E1000_TARC(0), tarc);
2157 	} else if (sc->hw.mac.type == e1000_80003es2lan) {
2158 		tarc = E1000_READ_REG(&sc->hw, E1000_TARC(0));
2159 		tarc |= 1;
2160 		E1000_WRITE_REG(&sc->hw, E1000_TARC(0), tarc);
2161 		tarc = E1000_READ_REG(&sc->hw, E1000_TARC(1));
2162 		tarc |= 1;
2163 		E1000_WRITE_REG(&sc->hw, E1000_TARC(1), tarc);
2164 	}
2165 
2166 	/* Program the Transmit Control Register */
2167 	tctl = E1000_READ_REG(&sc->hw, E1000_TCTL);
2168 	tctl &= ~E1000_TCTL_CT;
2169 	tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
2170 		(E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2171 	tctl |= E1000_TCTL_MULR;
2172 
2173 	/* This write will effectively turn on the transmit unit. */
2174 	E1000_WRITE_REG(&sc->hw, E1000_TCTL, tctl);
2175 }
2176 
2177 static void
2178 emx_destroy_tx_ring(struct emx_softc *sc, int ndesc)
2179 {
2180 	struct emx_txbuf *tx_buffer;
2181 	int i;
2182 
2183 	/* Free Transmit Descriptor ring */
2184 	if (sc->tx_desc_base) {
2185 		bus_dmamap_unload(sc->tx_desc_dtag, sc->tx_desc_dmap);
2186 		bus_dmamem_free(sc->tx_desc_dtag, sc->tx_desc_base,
2187 				sc->tx_desc_dmap);
2188 		bus_dma_tag_destroy(sc->tx_desc_dtag);
2189 
2190 		sc->tx_desc_base = NULL;
2191 	}
2192 
2193 	if (sc->tx_buf == NULL)
2194 		return;
2195 
2196 	for (i = 0; i < ndesc; i++) {
2197 		tx_buffer = &sc->tx_buf[i];
2198 
2199 		KKASSERT(tx_buffer->m_head == NULL);
2200 		bus_dmamap_destroy(sc->txtag, tx_buffer->map);
2201 	}
2202 	bus_dma_tag_destroy(sc->txtag);
2203 
2204 	kfree(sc->tx_buf, M_DEVBUF);
2205 	sc->tx_buf = NULL;
2206 }
2207 
2208 /*
2209  * The offload context needs to be set when we transfer the first
2210  * packet of a particular protocol (TCP/UDP).  This routine has been
2211  * enhanced to deal with inserted VLAN headers.
2212  *
2213  * If the new packet's ether header length, ip header length and
2214  * csum offloading type are same as the previous packet, we should
2215  * avoid allocating a new csum context descriptor; mainly to take
2216  * advantage of the pipeline effect of the TX data read request.
2217  *
2218  * This function returns number of TX descrptors allocated for
2219  * csum context.
2220  */
2221 static int
2222 emx_txcsum(struct emx_softc *sc, struct mbuf *mp,
2223 	   uint32_t *txd_upper, uint32_t *txd_lower)
2224 {
2225 	struct e1000_context_desc *TXD;
2226 	int curr_txd, ehdrlen, csum_flags;
2227 	uint32_t cmd, hdr_len, ip_hlen;
2228 
2229 	csum_flags = mp->m_pkthdr.csum_flags & EMX_CSUM_FEATURES;
2230 	ip_hlen = mp->m_pkthdr.csum_iphlen;
2231 	ehdrlen = mp->m_pkthdr.csum_lhlen;
2232 
2233 	if (sc->csum_lhlen == ehdrlen && sc->csum_iphlen == ip_hlen &&
2234 	    sc->csum_flags == csum_flags) {
2235 		/*
2236 		 * Same csum offload context as the previous packets;
2237 		 * just return.
2238 		 */
2239 		*txd_upper = sc->csum_txd_upper;
2240 		*txd_lower = sc->csum_txd_lower;
2241 		return 0;
2242 	}
2243 
2244 	/*
2245 	 * Setup a new csum offload context.
2246 	 */
2247 
2248 	curr_txd = sc->next_avail_tx_desc;
2249 	TXD = (struct e1000_context_desc *)&sc->tx_desc_base[curr_txd];
2250 
2251 	cmd = 0;
2252 
2253 	/* Setup of IP header checksum. */
2254 	if (csum_flags & CSUM_IP) {
2255 		/*
2256 		 * Start offset for header checksum calculation.
2257 		 * End offset for header checksum calculation.
2258 		 * Offset of place to put the checksum.
2259 		 */
2260 		TXD->lower_setup.ip_fields.ipcss = ehdrlen;
2261 		TXD->lower_setup.ip_fields.ipcse =
2262 		    htole16(ehdrlen + ip_hlen - 1);
2263 		TXD->lower_setup.ip_fields.ipcso =
2264 		    ehdrlen + offsetof(struct ip, ip_sum);
2265 		cmd |= E1000_TXD_CMD_IP;
2266 		*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
2267 	}
2268 	hdr_len = ehdrlen + ip_hlen;
2269 
2270 	if (csum_flags & CSUM_TCP) {
2271 		/*
2272 		 * Start offset for payload checksum calculation.
2273 		 * End offset for payload checksum calculation.
2274 		 * Offset of place to put the checksum.
2275 		 */
2276 		TXD->upper_setup.tcp_fields.tucss = hdr_len;
2277 		TXD->upper_setup.tcp_fields.tucse = htole16(0);
2278 		TXD->upper_setup.tcp_fields.tucso =
2279 		    hdr_len + offsetof(struct tcphdr, th_sum);
2280 		cmd |= E1000_TXD_CMD_TCP;
2281 		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
2282 	} else if (csum_flags & CSUM_UDP) {
2283 		/*
2284 		 * Start offset for header checksum calculation.
2285 		 * End offset for header checksum calculation.
2286 		 * Offset of place to put the checksum.
2287 		 */
2288 		TXD->upper_setup.tcp_fields.tucss = hdr_len;
2289 		TXD->upper_setup.tcp_fields.tucse = htole16(0);
2290 		TXD->upper_setup.tcp_fields.tucso =
2291 		    hdr_len + offsetof(struct udphdr, uh_sum);
2292 		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
2293 	}
2294 
2295 	*txd_lower = E1000_TXD_CMD_DEXT |	/* Extended descr type */
2296 		     E1000_TXD_DTYP_D;		/* Data descr */
2297 
2298 	/* Save the information for this csum offloading context */
2299 	sc->csum_lhlen = ehdrlen;
2300 	sc->csum_iphlen = ip_hlen;
2301 	sc->csum_flags = csum_flags;
2302 	sc->csum_txd_upper = *txd_upper;
2303 	sc->csum_txd_lower = *txd_lower;
2304 
2305 	TXD->tcp_seg_setup.data = htole32(0);
2306 	TXD->cmd_and_length =
2307 	    htole32(E1000_TXD_CMD_IFCS | E1000_TXD_CMD_DEXT | cmd);
2308 
2309 	if (++curr_txd == sc->num_tx_desc)
2310 		curr_txd = 0;
2311 
2312 	KKASSERT(sc->num_tx_desc_avail > 0);
2313 	sc->num_tx_desc_avail--;
2314 
2315 	sc->next_avail_tx_desc = curr_txd;
2316 	return 1;
2317 }
2318 
2319 static void
2320 emx_txeof(struct emx_softc *sc)
2321 {
2322 	struct ifnet *ifp = &sc->arpcom.ac_if;
2323 	struct emx_txbuf *tx_buffer;
2324 	int first, num_avail;
2325 
2326 	if (sc->tx_dd_head == sc->tx_dd_tail)
2327 		return;
2328 
2329 	if (sc->num_tx_desc_avail == sc->num_tx_desc)
2330 		return;
2331 
2332 	num_avail = sc->num_tx_desc_avail;
2333 	first = sc->next_tx_to_clean;
2334 
2335 	while (sc->tx_dd_head != sc->tx_dd_tail) {
2336 		int dd_idx = sc->tx_dd[sc->tx_dd_head];
2337 		struct e1000_tx_desc *tx_desc;
2338 
2339 		tx_desc = &sc->tx_desc_base[dd_idx];
2340 		if (tx_desc->upper.fields.status & E1000_TXD_STAT_DD) {
2341 			EMX_INC_TXDD_IDX(sc->tx_dd_head);
2342 
2343 			if (++dd_idx == sc->num_tx_desc)
2344 				dd_idx = 0;
2345 
2346 			while (first != dd_idx) {
2347 				logif(pkt_txclean);
2348 
2349 				num_avail++;
2350 
2351 				tx_buffer = &sc->tx_buf[first];
2352 				if (tx_buffer->m_head) {
2353 					ifp->if_opackets++;
2354 					bus_dmamap_unload(sc->txtag,
2355 							  tx_buffer->map);
2356 					m_freem(tx_buffer->m_head);
2357 					tx_buffer->m_head = NULL;
2358 				}
2359 
2360 				if (++first == sc->num_tx_desc)
2361 					first = 0;
2362 			}
2363 		} else {
2364 			break;
2365 		}
2366 	}
2367 	sc->next_tx_to_clean = first;
2368 	sc->num_tx_desc_avail = num_avail;
2369 
2370 	if (sc->tx_dd_head == sc->tx_dd_tail) {
2371 		sc->tx_dd_head = 0;
2372 		sc->tx_dd_tail = 0;
2373 	}
2374 
2375 	if (!EMX_IS_OACTIVE(sc)) {
2376 		ifp->if_flags &= ~IFF_OACTIVE;
2377 
2378 		/* All clean, turn off the timer */
2379 		if (sc->num_tx_desc_avail == sc->num_tx_desc)
2380 			ifp->if_timer = 0;
2381 	}
2382 }
2383 
2384 static void
2385 emx_tx_collect(struct emx_softc *sc)
2386 {
2387 	struct ifnet *ifp = &sc->arpcom.ac_if;
2388 	struct emx_txbuf *tx_buffer;
2389 	int tdh, first, num_avail, dd_idx = -1;
2390 
2391 	if (sc->num_tx_desc_avail == sc->num_tx_desc)
2392 		return;
2393 
2394 	tdh = E1000_READ_REG(&sc->hw, E1000_TDH(0));
2395 	if (tdh == sc->next_tx_to_clean)
2396 		return;
2397 
2398 	if (sc->tx_dd_head != sc->tx_dd_tail)
2399 		dd_idx = sc->tx_dd[sc->tx_dd_head];
2400 
2401 	num_avail = sc->num_tx_desc_avail;
2402 	first = sc->next_tx_to_clean;
2403 
2404 	while (first != tdh) {
2405 		logif(pkt_txclean);
2406 
2407 		num_avail++;
2408 
2409 		tx_buffer = &sc->tx_buf[first];
2410 		if (tx_buffer->m_head) {
2411 			ifp->if_opackets++;
2412 			bus_dmamap_unload(sc->txtag,
2413 					  tx_buffer->map);
2414 			m_freem(tx_buffer->m_head);
2415 			tx_buffer->m_head = NULL;
2416 		}
2417 
2418 		if (first == dd_idx) {
2419 			EMX_INC_TXDD_IDX(sc->tx_dd_head);
2420 			if (sc->tx_dd_head == sc->tx_dd_tail) {
2421 				sc->tx_dd_head = 0;
2422 				sc->tx_dd_tail = 0;
2423 				dd_idx = -1;
2424 			} else {
2425 				dd_idx = sc->tx_dd[sc->tx_dd_head];
2426 			}
2427 		}
2428 
2429 		if (++first == sc->num_tx_desc)
2430 			first = 0;
2431 	}
2432 	sc->next_tx_to_clean = first;
2433 	sc->num_tx_desc_avail = num_avail;
2434 
2435 	if (!EMX_IS_OACTIVE(sc)) {
2436 		ifp->if_flags &= ~IFF_OACTIVE;
2437 
2438 		/* All clean, turn off the timer */
2439 		if (sc->num_tx_desc_avail == sc->num_tx_desc)
2440 			ifp->if_timer = 0;
2441 	}
2442 }
2443 
2444 /*
2445  * When Link is lost sometimes there is work still in the TX ring
2446  * which will result in a watchdog, rather than allow that do an
2447  * attempted cleanup and then reinit here.  Note that this has been
2448  * seens mostly with fiber adapters.
2449  */
2450 static void
2451 emx_tx_purge(struct emx_softc *sc)
2452 {
2453 	struct ifnet *ifp = &sc->arpcom.ac_if;
2454 
2455 	if (!sc->link_active && ifp->if_timer) {
2456 		emx_tx_collect(sc);
2457 		if (ifp->if_timer) {
2458 			if_printf(ifp, "Link lost, TX pending, reinit\n");
2459 			ifp->if_timer = 0;
2460 			emx_init(sc);
2461 		}
2462 	}
2463 }
2464 
2465 static int
2466 emx_newbuf(struct emx_softc *sc, struct emx_rxdata *rdata, int i, int init)
2467 {
2468 	struct mbuf *m;
2469 	bus_dma_segment_t seg;
2470 	bus_dmamap_t map;
2471 	struct emx_rxbuf *rx_buffer;
2472 	int error, nseg;
2473 
2474 	m = m_getcl(init ? MB_WAIT : MB_DONTWAIT, MT_DATA, M_PKTHDR);
2475 	if (m == NULL) {
2476 		if (init) {
2477 			if_printf(&sc->arpcom.ac_if,
2478 				  "Unable to allocate RX mbuf\n");
2479 		}
2480 		return (ENOBUFS);
2481 	}
2482 	m->m_len = m->m_pkthdr.len = MCLBYTES;
2483 
2484 	if (sc->max_frame_size <= MCLBYTES - ETHER_ALIGN)
2485 		m_adj(m, ETHER_ALIGN);
2486 
2487 	error = bus_dmamap_load_mbuf_segment(rdata->rxtag,
2488 			rdata->rx_sparemap, m,
2489 			&seg, 1, &nseg, BUS_DMA_NOWAIT);
2490 	if (error) {
2491 		m_freem(m);
2492 		if (init) {
2493 			if_printf(&sc->arpcom.ac_if,
2494 				  "Unable to load RX mbuf\n");
2495 		}
2496 		return (error);
2497 	}
2498 
2499 	rx_buffer = &rdata->rx_buf[i];
2500 	if (rx_buffer->m_head != NULL)
2501 		bus_dmamap_unload(rdata->rxtag, rx_buffer->map);
2502 
2503 	map = rx_buffer->map;
2504 	rx_buffer->map = rdata->rx_sparemap;
2505 	rdata->rx_sparemap = map;
2506 
2507 	rx_buffer->m_head = m;
2508 	rx_buffer->paddr = seg.ds_addr;
2509 
2510 	emx_setup_rxdesc(&rdata->rx_desc[i], rx_buffer);
2511 	return (0);
2512 }
2513 
2514 static int
2515 emx_create_rx_ring(struct emx_softc *sc, struct emx_rxdata *rdata)
2516 {
2517 	device_t dev = sc->dev;
2518 	struct emx_rxbuf *rx_buffer;
2519 	int i, error, rsize, nrxd;
2520 
2521 	/*
2522 	 * Validate number of receive descriptors.  It must not exceed
2523 	 * hardware maximum, and must be multiple of E1000_DBA_ALIGN.
2524 	 */
2525 	nrxd = device_getenv_int(dev, "rxd", emx_rxd);
2526 	if ((nrxd * sizeof(emx_rxdesc_t)) % EMX_DBA_ALIGN != 0 ||
2527 	    nrxd > EMX_MAX_RXD || nrxd < EMX_MIN_RXD) {
2528 		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
2529 		    EMX_DEFAULT_RXD, nrxd);
2530 		rdata->num_rx_desc = EMX_DEFAULT_RXD;
2531 	} else {
2532 		rdata->num_rx_desc = nrxd;
2533 	}
2534 
2535 	/*
2536 	 * Allocate Receive Descriptor ring
2537 	 */
2538 	rsize = roundup2(rdata->num_rx_desc * sizeof(emx_rxdesc_t),
2539 			 EMX_DBA_ALIGN);
2540 	rdata->rx_desc = bus_dmamem_coherent_any(sc->parent_dtag,
2541 				EMX_DBA_ALIGN, rsize, BUS_DMA_WAITOK,
2542 				&rdata->rx_desc_dtag, &rdata->rx_desc_dmap,
2543 				&rdata->rx_desc_paddr);
2544 	if (rdata->rx_desc == NULL) {
2545 		device_printf(dev, "Unable to allocate rx_desc memory\n");
2546 		return ENOMEM;
2547 	}
2548 
2549 	rdata->rx_buf = kmalloc(sizeof(struct emx_rxbuf) * rdata->num_rx_desc,
2550 				M_DEVBUF, M_WAITOK | M_ZERO);
2551 
2552 	/*
2553 	 * Create DMA tag for rx buffers
2554 	 */
2555 	error = bus_dma_tag_create(sc->parent_dtag, /* parent */
2556 			1, 0,			/* alignment, bounds */
2557 			BUS_SPACE_MAXADDR,	/* lowaddr */
2558 			BUS_SPACE_MAXADDR,	/* highaddr */
2559 			NULL, NULL,		/* filter, filterarg */
2560 			MCLBYTES,		/* maxsize */
2561 			1,			/* nsegments */
2562 			MCLBYTES,		/* maxsegsize */
2563 			BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, /* flags */
2564 			&rdata->rxtag);
2565 	if (error) {
2566 		device_printf(dev, "Unable to allocate RX DMA tag\n");
2567 		kfree(rdata->rx_buf, M_DEVBUF);
2568 		rdata->rx_buf = NULL;
2569 		return error;
2570 	}
2571 
2572 	/*
2573 	 * Create spare DMA map for rx buffers
2574 	 */
2575 	error = bus_dmamap_create(rdata->rxtag, BUS_DMA_WAITOK,
2576 				  &rdata->rx_sparemap);
2577 	if (error) {
2578 		device_printf(dev, "Unable to create spare RX DMA map\n");
2579 		bus_dma_tag_destroy(rdata->rxtag);
2580 		kfree(rdata->rx_buf, M_DEVBUF);
2581 		rdata->rx_buf = NULL;
2582 		return error;
2583 	}
2584 
2585 	/*
2586 	 * Create DMA maps for rx buffers
2587 	 */
2588 	for (i = 0; i < rdata->num_rx_desc; i++) {
2589 		rx_buffer = &rdata->rx_buf[i];
2590 
2591 		error = bus_dmamap_create(rdata->rxtag, BUS_DMA_WAITOK,
2592 					  &rx_buffer->map);
2593 		if (error) {
2594 			device_printf(dev, "Unable to create RX DMA map\n");
2595 			emx_destroy_rx_ring(sc, rdata, i);
2596 			return error;
2597 		}
2598 	}
2599 	return (0);
2600 }
2601 
2602 static void
2603 emx_free_rx_ring(struct emx_softc *sc, struct emx_rxdata *rdata)
2604 {
2605 	int i;
2606 
2607 	for (i = 0; i < rdata->num_rx_desc; i++) {
2608 		struct emx_rxbuf *rx_buffer = &rdata->rx_buf[i];
2609 
2610 		if (rx_buffer->m_head != NULL) {
2611 			bus_dmamap_unload(rdata->rxtag, rx_buffer->map);
2612 			m_freem(rx_buffer->m_head);
2613 			rx_buffer->m_head = NULL;
2614 		}
2615 	}
2616 
2617 	if (rdata->fmp != NULL)
2618 		m_freem(rdata->fmp);
2619 	rdata->fmp = NULL;
2620 	rdata->lmp = NULL;
2621 }
2622 
2623 static int
2624 emx_init_rx_ring(struct emx_softc *sc, struct emx_rxdata *rdata)
2625 {
2626 	int i, error;
2627 
2628 	/* Reset descriptor ring */
2629 	bzero(rdata->rx_desc, sizeof(emx_rxdesc_t) * rdata->num_rx_desc);
2630 
2631 	/* Allocate new ones. */
2632 	for (i = 0; i < rdata->num_rx_desc; i++) {
2633 		error = emx_newbuf(sc, rdata, i, 1);
2634 		if (error)
2635 			return (error);
2636 	}
2637 
2638 	/* Setup our descriptor pointers */
2639 	rdata->next_rx_desc_to_check = 0;
2640 
2641 	return (0);
2642 }
2643 
2644 static void
2645 emx_init_rx_unit(struct emx_softc *sc)
2646 {
2647 	struct ifnet *ifp = &sc->arpcom.ac_if;
2648 	uint64_t bus_addr;
2649 	uint32_t rctl, itr, rfctl;
2650 	int i;
2651 
2652 	/*
2653 	 * Make sure receives are disabled while setting
2654 	 * up the descriptor ring
2655 	 */
2656 	rctl = E1000_READ_REG(&sc->hw, E1000_RCTL);
2657 	E1000_WRITE_REG(&sc->hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
2658 
2659 	/*
2660 	 * Set the interrupt throttling rate. Value is calculated
2661 	 * as ITR = 1 / (INT_THROTTLE_CEIL * 256ns)
2662 	 */
2663 	if (sc->int_throttle_ceil)
2664 		itr = 1000000000 / 256 / sc->int_throttle_ceil;
2665 	else
2666 		itr = 0;
2667 	emx_set_itr(sc, itr);
2668 
2669 	/* Use extended RX descriptor */
2670 	rfctl = E1000_RFCTL_EXTEN;
2671 
2672 	/* Disable accelerated ackknowledge */
2673 	if (sc->hw.mac.type == e1000_82574)
2674 		rfctl |= E1000_RFCTL_ACK_DIS;
2675 
2676 	E1000_WRITE_REG(&sc->hw, E1000_RFCTL, rfctl);
2677 
2678 	/*
2679 	 * Receive Checksum Offload for TCP and UDP
2680 	 *
2681 	 * Checksum offloading is also enabled if multiple receive
2682 	 * queue is to be supported, since we need it to figure out
2683 	 * packet type.
2684 	 */
2685 	if ((ifp->if_capenable & IFCAP_RXCSUM) ||
2686 	    sc->rx_ring_cnt > 1) {
2687 		uint32_t rxcsum;
2688 
2689 		rxcsum = E1000_READ_REG(&sc->hw, E1000_RXCSUM);
2690 
2691 		/*
2692 		 * NOTE:
2693 		 * PCSD must be enabled to enable multiple
2694 		 * receive queues.
2695 		 */
2696 		rxcsum |= E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL |
2697 			  E1000_RXCSUM_PCSD;
2698 		E1000_WRITE_REG(&sc->hw, E1000_RXCSUM, rxcsum);
2699 	}
2700 
2701 	/*
2702 	 * Configure multiple receive queue (RSS)
2703 	 */
2704 	if (sc->rx_ring_cnt > 1) {
2705 		uint8_t key[EMX_NRSSRK * EMX_RSSRK_SIZE];
2706 		uint32_t reta;
2707 
2708 		KASSERT(sc->rx_ring_cnt == EMX_NRX_RING,
2709 		    ("invalid number of RX ring (%d)", sc->rx_ring_cnt));
2710 
2711 		/*
2712 		 * NOTE:
2713 		 * When we reach here, RSS has already been disabled
2714 		 * in emx_stop(), so we could safely configure RSS key
2715 		 * and redirect table.
2716 		 */
2717 
2718 		/*
2719 		 * Configure RSS key
2720 		 */
2721 		toeplitz_get_key(key, sizeof(key));
2722 		for (i = 0; i < EMX_NRSSRK; ++i) {
2723 			uint32_t rssrk;
2724 
2725 			rssrk = EMX_RSSRK_VAL(key, i);
2726 			EMX_RSS_DPRINTF(sc, 1, "rssrk%d 0x%08x\n", i, rssrk);
2727 
2728 			E1000_WRITE_REG(&sc->hw, E1000_RSSRK(i), rssrk);
2729 		}
2730 
2731 		/*
2732 		 * Configure RSS redirect table in following fashion:
2733 	 	 * (hash & ring_cnt_mask) == rdr_table[(hash & rdr_table_mask)]
2734 		 */
2735 		reta = 0;
2736 		for (i = 0; i < EMX_RETA_SIZE; ++i) {
2737 			uint32_t q;
2738 
2739 			q = (i % sc->rx_ring_cnt) << EMX_RETA_RINGIDX_SHIFT;
2740 			reta |= q << (8 * i);
2741 		}
2742 		EMX_RSS_DPRINTF(sc, 1, "reta 0x%08x\n", reta);
2743 
2744 		for (i = 0; i < EMX_NRETA; ++i)
2745 			E1000_WRITE_REG(&sc->hw, E1000_RETA(i), reta);
2746 
2747 		/*
2748 		 * Enable multiple receive queues.
2749 		 * Enable IPv4 RSS standard hash functions.
2750 		 * Disable RSS interrupt.
2751 		 */
2752 		E1000_WRITE_REG(&sc->hw, E1000_MRQC,
2753 				E1000_MRQC_ENABLE_RSS_2Q |
2754 				E1000_MRQC_RSS_FIELD_IPV4_TCP |
2755 				E1000_MRQC_RSS_FIELD_IPV4);
2756 	}
2757 
2758 	/*
2759 	 * XXX TEMPORARY WORKAROUND: on some systems with 82573
2760 	 * long latencies are observed, like Lenovo X60. This
2761 	 * change eliminates the problem, but since having positive
2762 	 * values in RDTR is a known source of problems on other
2763 	 * platforms another solution is being sought.
2764 	 */
2765 	if (emx_82573_workaround && sc->hw.mac.type == e1000_82573) {
2766 		E1000_WRITE_REG(&sc->hw, E1000_RADV, EMX_RADV_82573);
2767 		E1000_WRITE_REG(&sc->hw, E1000_RDTR, EMX_RDTR_82573);
2768 	}
2769 
2770 	for (i = 0; i < sc->rx_ring_cnt; ++i) {
2771 		struct emx_rxdata *rdata = &sc->rx_data[i];
2772 
2773 		/*
2774 		 * Setup the Base and Length of the Rx Descriptor Ring
2775 		 */
2776 		bus_addr = rdata->rx_desc_paddr;
2777 		E1000_WRITE_REG(&sc->hw, E1000_RDLEN(i),
2778 		    rdata->num_rx_desc * sizeof(emx_rxdesc_t));
2779 		E1000_WRITE_REG(&sc->hw, E1000_RDBAH(i),
2780 		    (uint32_t)(bus_addr >> 32));
2781 		E1000_WRITE_REG(&sc->hw, E1000_RDBAL(i),
2782 		    (uint32_t)bus_addr);
2783 
2784 		/*
2785 		 * Setup the HW Rx Head and Tail Descriptor Pointers
2786 		 */
2787 		E1000_WRITE_REG(&sc->hw, E1000_RDH(i), 0);
2788 		E1000_WRITE_REG(&sc->hw, E1000_RDT(i),
2789 		    sc->rx_data[i].num_rx_desc - 1);
2790 	}
2791 
2792 	/* Setup the Receive Control Register */
2793 	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2794 	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
2795 		E1000_RCTL_RDMTS_HALF | E1000_RCTL_SECRC |
2796 		(sc->hw.mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2797 
2798 	/* Make sure VLAN Filters are off */
2799 	rctl &= ~E1000_RCTL_VFE;
2800 
2801 	/* Don't store bad paket */
2802 	rctl &= ~E1000_RCTL_SBP;
2803 
2804 	/* MCLBYTES */
2805 	rctl |= E1000_RCTL_SZ_2048;
2806 
2807 	if (ifp->if_mtu > ETHERMTU)
2808 		rctl |= E1000_RCTL_LPE;
2809 	else
2810 		rctl &= ~E1000_RCTL_LPE;
2811 
2812 	/* Enable Receives */
2813 	E1000_WRITE_REG(&sc->hw, E1000_RCTL, rctl);
2814 }
2815 
2816 static void
2817 emx_destroy_rx_ring(struct emx_softc *sc, struct emx_rxdata *rdata, int ndesc)
2818 {
2819 	struct emx_rxbuf *rx_buffer;
2820 	int i;
2821 
2822 	/* Free Receive Descriptor ring */
2823 	if (rdata->rx_desc) {
2824 		bus_dmamap_unload(rdata->rx_desc_dtag, rdata->rx_desc_dmap);
2825 		bus_dmamem_free(rdata->rx_desc_dtag, rdata->rx_desc,
2826 				rdata->rx_desc_dmap);
2827 		bus_dma_tag_destroy(rdata->rx_desc_dtag);
2828 
2829 		rdata->rx_desc = NULL;
2830 	}
2831 
2832 	if (rdata->rx_buf == NULL)
2833 		return;
2834 
2835 	for (i = 0; i < ndesc; i++) {
2836 		rx_buffer = &rdata->rx_buf[i];
2837 
2838 		KKASSERT(rx_buffer->m_head == NULL);
2839 		bus_dmamap_destroy(rdata->rxtag, rx_buffer->map);
2840 	}
2841 	bus_dmamap_destroy(rdata->rxtag, rdata->rx_sparemap);
2842 	bus_dma_tag_destroy(rdata->rxtag);
2843 
2844 	kfree(rdata->rx_buf, M_DEVBUF);
2845 	rdata->rx_buf = NULL;
2846 }
2847 
2848 static void
2849 emx_rxeof(struct emx_softc *sc, int ring_idx, int count)
2850 {
2851 	struct emx_rxdata *rdata = &sc->rx_data[ring_idx];
2852 	struct ifnet *ifp = &sc->arpcom.ac_if;
2853 	uint32_t staterr;
2854 	emx_rxdesc_t *current_desc;
2855 	struct mbuf *mp;
2856 	int i;
2857 
2858 	i = rdata->next_rx_desc_to_check;
2859 	current_desc = &rdata->rx_desc[i];
2860 	staterr = le32toh(current_desc->rxd_staterr);
2861 
2862 	if (!(staterr & E1000_RXD_STAT_DD))
2863 		return;
2864 
2865 	while ((staterr & E1000_RXD_STAT_DD) && count != 0) {
2866 		struct pktinfo *pi = NULL, pi0;
2867 		struct emx_rxbuf *rx_buf = &rdata->rx_buf[i];
2868 		struct mbuf *m = NULL;
2869 		int eop, len;
2870 
2871 		logif(pkt_receive);
2872 
2873 		mp = rx_buf->m_head;
2874 
2875 		/*
2876 		 * Can't defer bus_dmamap_sync(9) because TBI_ACCEPT
2877 		 * needs to access the last received byte in the mbuf.
2878 		 */
2879 		bus_dmamap_sync(rdata->rxtag, rx_buf->map,
2880 				BUS_DMASYNC_POSTREAD);
2881 
2882 		len = le16toh(current_desc->rxd_length);
2883 		if (staterr & E1000_RXD_STAT_EOP) {
2884 			count--;
2885 			eop = 1;
2886 		} else {
2887 			eop = 0;
2888 		}
2889 
2890 		if (!(staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK)) {
2891 			uint16_t vlan = 0;
2892 			uint32_t mrq, rss_hash;
2893 
2894 			/*
2895 			 * Save several necessary information,
2896 			 * before emx_newbuf() destroy it.
2897 			 */
2898 			if ((staterr & E1000_RXD_STAT_VP) && eop)
2899 				vlan = le16toh(current_desc->rxd_vlan);
2900 
2901 			mrq = le32toh(current_desc->rxd_mrq);
2902 			rss_hash = le32toh(current_desc->rxd_rss);
2903 
2904 			EMX_RSS_DPRINTF(sc, 10,
2905 			    "ring%d, mrq 0x%08x, rss_hash 0x%08x\n",
2906 			    ring_idx, mrq, rss_hash);
2907 
2908 			if (emx_newbuf(sc, rdata, i, 0) != 0) {
2909 				ifp->if_iqdrops++;
2910 				goto discard;
2911 			}
2912 
2913 			/* Assign correct length to the current fragment */
2914 			mp->m_len = len;
2915 
2916 			if (rdata->fmp == NULL) {
2917 				mp->m_pkthdr.len = len;
2918 				rdata->fmp = mp; /* Store the first mbuf */
2919 				rdata->lmp = mp;
2920 			} else {
2921 				/*
2922 				 * Chain mbuf's together
2923 				 */
2924 				rdata->lmp->m_next = mp;
2925 				rdata->lmp = rdata->lmp->m_next;
2926 				rdata->fmp->m_pkthdr.len += len;
2927 			}
2928 
2929 			if (eop) {
2930 				rdata->fmp->m_pkthdr.rcvif = ifp;
2931 				ifp->if_ipackets++;
2932 
2933 				if (ifp->if_capenable & IFCAP_RXCSUM)
2934 					emx_rxcsum(staterr, rdata->fmp);
2935 
2936 				if (staterr & E1000_RXD_STAT_VP) {
2937 					rdata->fmp->m_pkthdr.ether_vlantag =
2938 					    vlan;
2939 					rdata->fmp->m_flags |= M_VLANTAG;
2940 				}
2941 				m = rdata->fmp;
2942 				rdata->fmp = NULL;
2943 				rdata->lmp = NULL;
2944 
2945 				if (ifp->if_capenable & IFCAP_RSS) {
2946 					pi = emx_rssinfo(m, &pi0, mrq,
2947 							 rss_hash, staterr);
2948 				}
2949 #ifdef EMX_RSS_DEBUG
2950 				rdata->rx_pkts++;
2951 #endif
2952 			}
2953 		} else {
2954 			ifp->if_ierrors++;
2955 discard:
2956 			emx_setup_rxdesc(current_desc, rx_buf);
2957 			if (rdata->fmp != NULL) {
2958 				m_freem(rdata->fmp);
2959 				rdata->fmp = NULL;
2960 				rdata->lmp = NULL;
2961 			}
2962 			m = NULL;
2963 		}
2964 
2965 		if (m != NULL)
2966 			ether_input_pkt(ifp, m, pi);
2967 
2968 		/* Advance our pointers to the next descriptor. */
2969 		if (++i == rdata->num_rx_desc)
2970 			i = 0;
2971 
2972 		current_desc = &rdata->rx_desc[i];
2973 		staterr = le32toh(current_desc->rxd_staterr);
2974 	}
2975 	rdata->next_rx_desc_to_check = i;
2976 
2977 	/* Advance the E1000's Receive Queue "Tail Pointer". */
2978 	if (--i < 0)
2979 		i = rdata->num_rx_desc - 1;
2980 	E1000_WRITE_REG(&sc->hw, E1000_RDT(ring_idx), i);
2981 }
2982 
2983 static void
2984 emx_enable_intr(struct emx_softc *sc)
2985 {
2986 	uint32_t ims_mask = IMS_ENABLE_MASK;
2987 
2988 	lwkt_serialize_handler_enable(&sc->main_serialize);
2989 
2990 #if 0
2991 	if (sc->hw.mac.type == e1000_82574) {
2992 		E1000_WRITE_REG(hw, EMX_EIAC, EM_MSIX_MASK);
2993 		ims_mask |= EM_MSIX_MASK;
2994 	}
2995 #endif
2996 	E1000_WRITE_REG(&sc->hw, E1000_IMS, ims_mask);
2997 }
2998 
2999 static void
3000 emx_disable_intr(struct emx_softc *sc)
3001 {
3002 	if (sc->hw.mac.type == e1000_82574)
3003 		E1000_WRITE_REG(&sc->hw, EMX_EIAC, 0);
3004 	E1000_WRITE_REG(&sc->hw, E1000_IMC, 0xffffffff);
3005 
3006 	lwkt_serialize_handler_disable(&sc->main_serialize);
3007 }
3008 
3009 /*
3010  * Bit of a misnomer, what this really means is
3011  * to enable OS management of the system... aka
3012  * to disable special hardware management features
3013  */
3014 static void
3015 emx_get_mgmt(struct emx_softc *sc)
3016 {
3017 	/* A shared code workaround */
3018 	if (sc->flags & EMX_FLAG_HAS_MGMT) {
3019 		int manc2h = E1000_READ_REG(&sc->hw, E1000_MANC2H);
3020 		int manc = E1000_READ_REG(&sc->hw, E1000_MANC);
3021 
3022 		/* disable hardware interception of ARP */
3023 		manc &= ~(E1000_MANC_ARP_EN);
3024 
3025                 /* enable receiving management packets to the host */
3026 		manc |= E1000_MANC_EN_MNG2HOST;
3027 #define E1000_MNG2HOST_PORT_623 (1 << 5)
3028 #define E1000_MNG2HOST_PORT_664 (1 << 6)
3029 		manc2h |= E1000_MNG2HOST_PORT_623;
3030 		manc2h |= E1000_MNG2HOST_PORT_664;
3031 		E1000_WRITE_REG(&sc->hw, E1000_MANC2H, manc2h);
3032 
3033 		E1000_WRITE_REG(&sc->hw, E1000_MANC, manc);
3034 	}
3035 }
3036 
3037 /*
3038  * Give control back to hardware management
3039  * controller if there is one.
3040  */
3041 static void
3042 emx_rel_mgmt(struct emx_softc *sc)
3043 {
3044 	if (sc->flags & EMX_FLAG_HAS_MGMT) {
3045 		int manc = E1000_READ_REG(&sc->hw, E1000_MANC);
3046 
3047 		/* re-enable hardware interception of ARP */
3048 		manc |= E1000_MANC_ARP_EN;
3049 		manc &= ~E1000_MANC_EN_MNG2HOST;
3050 
3051 		E1000_WRITE_REG(&sc->hw, E1000_MANC, manc);
3052 	}
3053 }
3054 
3055 /*
3056  * emx_get_hw_control() sets {CTRL_EXT|FWSM}:DRV_LOAD bit.
3057  * For ASF and Pass Through versions of f/w this means that
3058  * the driver is loaded.  For AMT version (only with 82573)
3059  * of the f/w this means that the network i/f is open.
3060  */
3061 static void
3062 emx_get_hw_control(struct emx_softc *sc)
3063 {
3064 	/* Let firmware know the driver has taken over */
3065 	if (sc->hw.mac.type == e1000_82573) {
3066 		uint32_t swsm;
3067 
3068 		swsm = E1000_READ_REG(&sc->hw, E1000_SWSM);
3069 		E1000_WRITE_REG(&sc->hw, E1000_SWSM,
3070 		    swsm | E1000_SWSM_DRV_LOAD);
3071 	} else {
3072 		uint32_t ctrl_ext;
3073 
3074 		ctrl_ext = E1000_READ_REG(&sc->hw, E1000_CTRL_EXT);
3075 		E1000_WRITE_REG(&sc->hw, E1000_CTRL_EXT,
3076 		    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
3077 	}
3078 	sc->flags |= EMX_FLAG_HW_CTRL;
3079 }
3080 
3081 /*
3082  * emx_rel_hw_control() resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
3083  * For ASF and Pass Through versions of f/w this means that the
3084  * driver is no longer loaded.  For AMT version (only with 82573)
3085  * of the f/w this means that the network i/f is closed.
3086  */
3087 static void
3088 emx_rel_hw_control(struct emx_softc *sc)
3089 {
3090 	if ((sc->flags & EMX_FLAG_HW_CTRL) == 0)
3091 		return;
3092 	sc->flags &= ~EMX_FLAG_HW_CTRL;
3093 
3094 	/* Let firmware taken over control of h/w */
3095 	if (sc->hw.mac.type == e1000_82573) {
3096 		uint32_t swsm;
3097 
3098 		swsm = E1000_READ_REG(&sc->hw, E1000_SWSM);
3099 		E1000_WRITE_REG(&sc->hw, E1000_SWSM,
3100 		    swsm & ~E1000_SWSM_DRV_LOAD);
3101 	} else {
3102 		uint32_t ctrl_ext;
3103 
3104 		ctrl_ext = E1000_READ_REG(&sc->hw, E1000_CTRL_EXT);
3105 		E1000_WRITE_REG(&sc->hw, E1000_CTRL_EXT,
3106 		    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
3107 	}
3108 }
3109 
3110 static int
3111 emx_is_valid_eaddr(const uint8_t *addr)
3112 {
3113 	char zero_addr[ETHER_ADDR_LEN] = { 0, 0, 0, 0, 0, 0 };
3114 
3115 	if ((addr[0] & 1) || !bcmp(addr, zero_addr, ETHER_ADDR_LEN))
3116 		return (FALSE);
3117 
3118 	return (TRUE);
3119 }
3120 
3121 /*
3122  * Enable PCI Wake On Lan capability
3123  */
3124 void
3125 emx_enable_wol(device_t dev)
3126 {
3127 	uint16_t cap, status;
3128 	uint8_t id;
3129 
3130 	/* First find the capabilities pointer*/
3131 	cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
3132 
3133 	/* Read the PM Capabilities */
3134 	id = pci_read_config(dev, cap, 1);
3135 	if (id != PCIY_PMG)     /* Something wrong */
3136 		return;
3137 
3138 	/*
3139 	 * OK, we have the power capabilities,
3140 	 * so now get the status register
3141 	 */
3142 	cap += PCIR_POWER_STATUS;
3143 	status = pci_read_config(dev, cap, 2);
3144 	status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
3145 	pci_write_config(dev, cap, status, 2);
3146 }
3147 
3148 static void
3149 emx_update_stats(struct emx_softc *sc)
3150 {
3151 	struct ifnet *ifp = &sc->arpcom.ac_if;
3152 
3153 	if (sc->hw.phy.media_type == e1000_media_type_copper ||
3154 	    (E1000_READ_REG(&sc->hw, E1000_STATUS) & E1000_STATUS_LU)) {
3155 		sc->stats.symerrs += E1000_READ_REG(&sc->hw, E1000_SYMERRS);
3156 		sc->stats.sec += E1000_READ_REG(&sc->hw, E1000_SEC);
3157 	}
3158 	sc->stats.crcerrs += E1000_READ_REG(&sc->hw, E1000_CRCERRS);
3159 	sc->stats.mpc += E1000_READ_REG(&sc->hw, E1000_MPC);
3160 	sc->stats.scc += E1000_READ_REG(&sc->hw, E1000_SCC);
3161 	sc->stats.ecol += E1000_READ_REG(&sc->hw, E1000_ECOL);
3162 
3163 	sc->stats.mcc += E1000_READ_REG(&sc->hw, E1000_MCC);
3164 	sc->stats.latecol += E1000_READ_REG(&sc->hw, E1000_LATECOL);
3165 	sc->stats.colc += E1000_READ_REG(&sc->hw, E1000_COLC);
3166 	sc->stats.dc += E1000_READ_REG(&sc->hw, E1000_DC);
3167 	sc->stats.rlec += E1000_READ_REG(&sc->hw, E1000_RLEC);
3168 	sc->stats.xonrxc += E1000_READ_REG(&sc->hw, E1000_XONRXC);
3169 	sc->stats.xontxc += E1000_READ_REG(&sc->hw, E1000_XONTXC);
3170 	sc->stats.xoffrxc += E1000_READ_REG(&sc->hw, E1000_XOFFRXC);
3171 	sc->stats.xofftxc += E1000_READ_REG(&sc->hw, E1000_XOFFTXC);
3172 	sc->stats.fcruc += E1000_READ_REG(&sc->hw, E1000_FCRUC);
3173 	sc->stats.prc64 += E1000_READ_REG(&sc->hw, E1000_PRC64);
3174 	sc->stats.prc127 += E1000_READ_REG(&sc->hw, E1000_PRC127);
3175 	sc->stats.prc255 += E1000_READ_REG(&sc->hw, E1000_PRC255);
3176 	sc->stats.prc511 += E1000_READ_REG(&sc->hw, E1000_PRC511);
3177 	sc->stats.prc1023 += E1000_READ_REG(&sc->hw, E1000_PRC1023);
3178 	sc->stats.prc1522 += E1000_READ_REG(&sc->hw, E1000_PRC1522);
3179 	sc->stats.gprc += E1000_READ_REG(&sc->hw, E1000_GPRC);
3180 	sc->stats.bprc += E1000_READ_REG(&sc->hw, E1000_BPRC);
3181 	sc->stats.mprc += E1000_READ_REG(&sc->hw, E1000_MPRC);
3182 	sc->stats.gptc += E1000_READ_REG(&sc->hw, E1000_GPTC);
3183 
3184 	/* For the 64-bit byte counters the low dword must be read first. */
3185 	/* Both registers clear on the read of the high dword */
3186 
3187 	sc->stats.gorc += E1000_READ_REG(&sc->hw, E1000_GORCH);
3188 	sc->stats.gotc += E1000_READ_REG(&sc->hw, E1000_GOTCH);
3189 
3190 	sc->stats.rnbc += E1000_READ_REG(&sc->hw, E1000_RNBC);
3191 	sc->stats.ruc += E1000_READ_REG(&sc->hw, E1000_RUC);
3192 	sc->stats.rfc += E1000_READ_REG(&sc->hw, E1000_RFC);
3193 	sc->stats.roc += E1000_READ_REG(&sc->hw, E1000_ROC);
3194 	sc->stats.rjc += E1000_READ_REG(&sc->hw, E1000_RJC);
3195 
3196 	sc->stats.tor += E1000_READ_REG(&sc->hw, E1000_TORH);
3197 	sc->stats.tot += E1000_READ_REG(&sc->hw, E1000_TOTH);
3198 
3199 	sc->stats.tpr += E1000_READ_REG(&sc->hw, E1000_TPR);
3200 	sc->stats.tpt += E1000_READ_REG(&sc->hw, E1000_TPT);
3201 	sc->stats.ptc64 += E1000_READ_REG(&sc->hw, E1000_PTC64);
3202 	sc->stats.ptc127 += E1000_READ_REG(&sc->hw, E1000_PTC127);
3203 	sc->stats.ptc255 += E1000_READ_REG(&sc->hw, E1000_PTC255);
3204 	sc->stats.ptc511 += E1000_READ_REG(&sc->hw, E1000_PTC511);
3205 	sc->stats.ptc1023 += E1000_READ_REG(&sc->hw, E1000_PTC1023);
3206 	sc->stats.ptc1522 += E1000_READ_REG(&sc->hw, E1000_PTC1522);
3207 	sc->stats.mptc += E1000_READ_REG(&sc->hw, E1000_MPTC);
3208 	sc->stats.bptc += E1000_READ_REG(&sc->hw, E1000_BPTC);
3209 
3210 	sc->stats.algnerrc += E1000_READ_REG(&sc->hw, E1000_ALGNERRC);
3211 	sc->stats.rxerrc += E1000_READ_REG(&sc->hw, E1000_RXERRC);
3212 	sc->stats.tncrs += E1000_READ_REG(&sc->hw, E1000_TNCRS);
3213 	sc->stats.cexterr += E1000_READ_REG(&sc->hw, E1000_CEXTERR);
3214 	sc->stats.tsctc += E1000_READ_REG(&sc->hw, E1000_TSCTC);
3215 	sc->stats.tsctfc += E1000_READ_REG(&sc->hw, E1000_TSCTFC);
3216 
3217 	ifp->if_collisions = sc->stats.colc;
3218 
3219 	/* Rx Errors */
3220 	ifp->if_ierrors = sc->stats.rxerrc +
3221 			  sc->stats.crcerrs + sc->stats.algnerrc +
3222 			  sc->stats.ruc + sc->stats.roc +
3223 			  sc->stats.mpc + sc->stats.cexterr;
3224 
3225 	/* Tx Errors */
3226 	ifp->if_oerrors = sc->stats.ecol + sc->stats.latecol;
3227 }
3228 
3229 static void
3230 emx_print_debug_info(struct emx_softc *sc)
3231 {
3232 	device_t dev = sc->dev;
3233 	uint8_t *hw_addr = sc->hw.hw_addr;
3234 
3235 	device_printf(dev, "Adapter hardware address = %p \n", hw_addr);
3236 	device_printf(dev, "CTRL = 0x%x RCTL = 0x%x \n",
3237 	    E1000_READ_REG(&sc->hw, E1000_CTRL),
3238 	    E1000_READ_REG(&sc->hw, E1000_RCTL));
3239 	device_printf(dev, "Packet buffer = Tx=%dk Rx=%dk \n",
3240 	    ((E1000_READ_REG(&sc->hw, E1000_PBA) & 0xffff0000) >> 16),\
3241 	    (E1000_READ_REG(&sc->hw, E1000_PBA) & 0xffff) );
3242 	device_printf(dev, "Flow control watermarks high = %d low = %d\n",
3243 	    sc->hw.fc.high_water, sc->hw.fc.low_water);
3244 	device_printf(dev, "tx_int_delay = %d, tx_abs_int_delay = %d\n",
3245 	    E1000_READ_REG(&sc->hw, E1000_TIDV),
3246 	    E1000_READ_REG(&sc->hw, E1000_TADV));
3247 	device_printf(dev, "rx_int_delay = %d, rx_abs_int_delay = %d\n",
3248 	    E1000_READ_REG(&sc->hw, E1000_RDTR),
3249 	    E1000_READ_REG(&sc->hw, E1000_RADV));
3250 	device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
3251 	    E1000_READ_REG(&sc->hw, E1000_TDH(0)),
3252 	    E1000_READ_REG(&sc->hw, E1000_TDT(0)));
3253 	device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
3254 	    E1000_READ_REG(&sc->hw, E1000_RDH(0)),
3255 	    E1000_READ_REG(&sc->hw, E1000_RDT(0)));
3256 	device_printf(dev, "Num Tx descriptors avail = %d\n",
3257 	    sc->num_tx_desc_avail);
3258 
3259 	device_printf(dev, "TSO segments %lu\n", sc->tso_segments);
3260 	device_printf(dev, "TSO ctx reused %lu\n", sc->tso_ctx_reused);
3261 }
3262 
3263 static void
3264 emx_print_hw_stats(struct emx_softc *sc)
3265 {
3266 	device_t dev = sc->dev;
3267 
3268 	device_printf(dev, "Excessive collisions = %lld\n",
3269 	    (long long)sc->stats.ecol);
3270 #if (DEBUG_HW > 0)  /* Dont output these errors normally */
3271 	device_printf(dev, "Symbol errors = %lld\n",
3272 	    (long long)sc->stats.symerrs);
3273 #endif
3274 	device_printf(dev, "Sequence errors = %lld\n",
3275 	    (long long)sc->stats.sec);
3276 	device_printf(dev, "Defer count = %lld\n",
3277 	    (long long)sc->stats.dc);
3278 	device_printf(dev, "Missed Packets = %lld\n",
3279 	    (long long)sc->stats.mpc);
3280 	device_printf(dev, "Receive No Buffers = %lld\n",
3281 	    (long long)sc->stats.rnbc);
3282 	/* RLEC is inaccurate on some hardware, calculate our own. */
3283 	device_printf(dev, "Receive Length Errors = %lld\n",
3284 	    ((long long)sc->stats.roc + (long long)sc->stats.ruc));
3285 	device_printf(dev, "Receive errors = %lld\n",
3286 	    (long long)sc->stats.rxerrc);
3287 	device_printf(dev, "Crc errors = %lld\n",
3288 	    (long long)sc->stats.crcerrs);
3289 	device_printf(dev, "Alignment errors = %lld\n",
3290 	    (long long)sc->stats.algnerrc);
3291 	device_printf(dev, "Collision/Carrier extension errors = %lld\n",
3292 	    (long long)sc->stats.cexterr);
3293 	device_printf(dev, "RX overruns = %ld\n", sc->rx_overruns);
3294 	device_printf(dev, "XON Rcvd = %lld\n",
3295 	    (long long)sc->stats.xonrxc);
3296 	device_printf(dev, "XON Xmtd = %lld\n",
3297 	    (long long)sc->stats.xontxc);
3298 	device_printf(dev, "XOFF Rcvd = %lld\n",
3299 	    (long long)sc->stats.xoffrxc);
3300 	device_printf(dev, "XOFF Xmtd = %lld\n",
3301 	    (long long)sc->stats.xofftxc);
3302 	device_printf(dev, "Good Packets Rcvd = %lld\n",
3303 	    (long long)sc->stats.gprc);
3304 	device_printf(dev, "Good Packets Xmtd = %lld\n",
3305 	    (long long)sc->stats.gptc);
3306 }
3307 
3308 static void
3309 emx_print_nvm_info(struct emx_softc *sc)
3310 {
3311 	uint16_t eeprom_data;
3312 	int i, j, row = 0;
3313 
3314 	/* Its a bit crude, but it gets the job done */
3315 	kprintf("\nInterface EEPROM Dump:\n");
3316 	kprintf("Offset\n0x0000  ");
3317 	for (i = 0, j = 0; i < 32; i++, j++) {
3318 		if (j == 8) { /* Make the offset block */
3319 			j = 0; ++row;
3320 			kprintf("\n0x00%x0  ",row);
3321 		}
3322 		e1000_read_nvm(&sc->hw, i, 1, &eeprom_data);
3323 		kprintf("%04x ", eeprom_data);
3324 	}
3325 	kprintf("\n");
3326 }
3327 
3328 static int
3329 emx_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
3330 {
3331 	struct emx_softc *sc;
3332 	struct ifnet *ifp;
3333 	int error, result;
3334 
3335 	result = -1;
3336 	error = sysctl_handle_int(oidp, &result, 0, req);
3337 	if (error || !req->newptr)
3338 		return (error);
3339 
3340 	sc = (struct emx_softc *)arg1;
3341 	ifp = &sc->arpcom.ac_if;
3342 
3343 	ifnet_serialize_all(ifp);
3344 
3345 	if (result == 1)
3346 		emx_print_debug_info(sc);
3347 
3348 	/*
3349 	 * This value will cause a hex dump of the
3350 	 * first 32 16-bit words of the EEPROM to
3351 	 * the screen.
3352 	 */
3353 	if (result == 2)
3354 		emx_print_nvm_info(sc);
3355 
3356 	ifnet_deserialize_all(ifp);
3357 
3358 	return (error);
3359 }
3360 
3361 static int
3362 emx_sysctl_stats(SYSCTL_HANDLER_ARGS)
3363 {
3364 	int error, result;
3365 
3366 	result = -1;
3367 	error = sysctl_handle_int(oidp, &result, 0, req);
3368 	if (error || !req->newptr)
3369 		return (error);
3370 
3371 	if (result == 1) {
3372 		struct emx_softc *sc = (struct emx_softc *)arg1;
3373 		struct ifnet *ifp = &sc->arpcom.ac_if;
3374 
3375 		ifnet_serialize_all(ifp);
3376 		emx_print_hw_stats(sc);
3377 		ifnet_deserialize_all(ifp);
3378 	}
3379 	return (error);
3380 }
3381 
3382 static void
3383 emx_add_sysctl(struct emx_softc *sc)
3384 {
3385 #ifdef EMX_RSS_DEBUG
3386 	char rx_pkt[32];
3387 	int i;
3388 #endif
3389 
3390 	sysctl_ctx_init(&sc->sysctl_ctx);
3391 	sc->sysctl_tree = SYSCTL_ADD_NODE(&sc->sysctl_ctx,
3392 				SYSCTL_STATIC_CHILDREN(_hw), OID_AUTO,
3393 				device_get_nameunit(sc->dev),
3394 				CTLFLAG_RD, 0, "");
3395 	if (sc->sysctl_tree == NULL) {
3396 		device_printf(sc->dev, "can't add sysctl node\n");
3397 		return;
3398 	}
3399 
3400 	SYSCTL_ADD_PROC(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree),
3401 			OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, sc, 0,
3402 			emx_sysctl_debug_info, "I", "Debug Information");
3403 
3404 	SYSCTL_ADD_PROC(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree),
3405 			OID_AUTO, "stats", CTLTYPE_INT|CTLFLAG_RW, sc, 0,
3406 			emx_sysctl_stats, "I", "Statistics");
3407 
3408 	SYSCTL_ADD_INT(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree),
3409 		       OID_AUTO, "rxd", CTLFLAG_RD,
3410 		       &sc->rx_data[0].num_rx_desc, 0, NULL);
3411 	SYSCTL_ADD_INT(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree),
3412 		       OID_AUTO, "txd", CTLFLAG_RD, &sc->num_tx_desc, 0, NULL);
3413 
3414 	SYSCTL_ADD_PROC(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree),
3415 			OID_AUTO, "int_throttle_ceil", CTLTYPE_INT|CTLFLAG_RW,
3416 			sc, 0, emx_sysctl_int_throttle, "I",
3417 			"interrupt throttling rate");
3418 	SYSCTL_ADD_PROC(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree),
3419 			OID_AUTO, "int_tx_nsegs", CTLTYPE_INT|CTLFLAG_RW,
3420 			sc, 0, emx_sysctl_int_tx_nsegs, "I",
3421 			"# segments per TX interrupt");
3422 
3423 	SYSCTL_ADD_INT(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree),
3424 		       OID_AUTO, "rx_ring_cnt", CTLFLAG_RD,
3425 		       &sc->rx_ring_cnt, 0, "RX ring count");
3426 
3427 #ifdef IFPOLL_ENABLE
3428 	SYSCTL_ADD_PROC(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree),
3429 			OID_AUTO, "npoll_rxoff", CTLTYPE_INT|CTLFLAG_RW,
3430 			sc, 0, emx_sysctl_npoll_rxoff, "I",
3431 			"NPOLLING RX cpu offset");
3432 	SYSCTL_ADD_PROC(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree),
3433 			OID_AUTO, "npoll_txoff", CTLTYPE_INT|CTLFLAG_RW,
3434 			sc, 0, emx_sysctl_npoll_txoff, "I",
3435 			"NPOLLING TX cpu offset");
3436 #endif
3437 
3438 #ifdef EMX_RSS_DEBUG
3439 	SYSCTL_ADD_INT(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree),
3440 		       OID_AUTO, "rss_debug", CTLFLAG_RW, &sc->rss_debug,
3441 		       0, "RSS debug level");
3442 	for (i = 0; i < sc->rx_ring_cnt; ++i) {
3443 		ksnprintf(rx_pkt, sizeof(rx_pkt), "rx%d_pkt", i);
3444 		SYSCTL_ADD_UINT(&sc->sysctl_ctx,
3445 				SYSCTL_CHILDREN(sc->sysctl_tree), OID_AUTO,
3446 				rx_pkt, CTLFLAG_RW,
3447 				&sc->rx_data[i].rx_pkts, 0, "RXed packets");
3448 	}
3449 #endif
3450 }
3451 
3452 static int
3453 emx_sysctl_int_throttle(SYSCTL_HANDLER_ARGS)
3454 {
3455 	struct emx_softc *sc = (void *)arg1;
3456 	struct ifnet *ifp = &sc->arpcom.ac_if;
3457 	int error, throttle;
3458 
3459 	throttle = sc->int_throttle_ceil;
3460 	error = sysctl_handle_int(oidp, &throttle, 0, req);
3461 	if (error || req->newptr == NULL)
3462 		return error;
3463 	if (throttle < 0 || throttle > 1000000000 / 256)
3464 		return EINVAL;
3465 
3466 	if (throttle) {
3467 		/*
3468 		 * Set the interrupt throttling rate in 256ns increments,
3469 		 * recalculate sysctl value assignment to get exact frequency.
3470 		 */
3471 		throttle = 1000000000 / 256 / throttle;
3472 
3473 		/* Upper 16bits of ITR is reserved and should be zero */
3474 		if (throttle & 0xffff0000)
3475 			return EINVAL;
3476 	}
3477 
3478 	ifnet_serialize_all(ifp);
3479 
3480 	if (throttle)
3481 		sc->int_throttle_ceil = 1000000000 / 256 / throttle;
3482 	else
3483 		sc->int_throttle_ceil = 0;
3484 
3485 	if (ifp->if_flags & IFF_RUNNING)
3486 		emx_set_itr(sc, throttle);
3487 
3488 	ifnet_deserialize_all(ifp);
3489 
3490 	if (bootverbose) {
3491 		if_printf(ifp, "Interrupt moderation set to %d/sec\n",
3492 			  sc->int_throttle_ceil);
3493 	}
3494 	return 0;
3495 }
3496 
3497 static int
3498 emx_sysctl_int_tx_nsegs(SYSCTL_HANDLER_ARGS)
3499 {
3500 	struct emx_softc *sc = (void *)arg1;
3501 	struct ifnet *ifp = &sc->arpcom.ac_if;
3502 	int error, segs;
3503 
3504 	segs = sc->tx_int_nsegs;
3505 	error = sysctl_handle_int(oidp, &segs, 0, req);
3506 	if (error || req->newptr == NULL)
3507 		return error;
3508 	if (segs <= 0)
3509 		return EINVAL;
3510 
3511 	ifnet_serialize_all(ifp);
3512 
3513 	/*
3514 	 * Don't allow int_tx_nsegs to become:
3515 	 * o  Less the oact_tx_desc
3516 	 * o  Too large that no TX desc will cause TX interrupt to
3517 	 *    be generated (OACTIVE will never recover)
3518 	 * o  Too small that will cause tx_dd[] overflow
3519 	 */
3520 	if (segs < sc->oact_tx_desc ||
3521 	    segs >= sc->num_tx_desc - sc->oact_tx_desc ||
3522 	    segs < sc->num_tx_desc / EMX_TXDD_SAFE) {
3523 		error = EINVAL;
3524 	} else {
3525 		error = 0;
3526 		sc->tx_int_nsegs = segs;
3527 	}
3528 
3529 	ifnet_deserialize_all(ifp);
3530 
3531 	return error;
3532 }
3533 
3534 #ifdef IFPOLL_ENABLE
3535 
3536 static int
3537 emx_sysctl_npoll_rxoff(SYSCTL_HANDLER_ARGS)
3538 {
3539 	struct emx_softc *sc = (void *)arg1;
3540 	struct ifnet *ifp = &sc->arpcom.ac_if;
3541 	int error, off;
3542 
3543 	off = sc->rx_npoll_off;
3544 	error = sysctl_handle_int(oidp, &off, 0, req);
3545 	if (error || req->newptr == NULL)
3546 		return error;
3547 	if (off < 0)
3548 		return EINVAL;
3549 
3550 	ifnet_serialize_all(ifp);
3551 	if (off >= ncpus2 || off % sc->rx_ring_cnt != 0) {
3552 		error = EINVAL;
3553 	} else {
3554 		error = 0;
3555 		sc->rx_npoll_off = off;
3556 	}
3557 	ifnet_deserialize_all(ifp);
3558 
3559 	return error;
3560 }
3561 
3562 static int
3563 emx_sysctl_npoll_txoff(SYSCTL_HANDLER_ARGS)
3564 {
3565 	struct emx_softc *sc = (void *)arg1;
3566 	struct ifnet *ifp = &sc->arpcom.ac_if;
3567 	int error, off;
3568 
3569 	off = sc->tx_npoll_off;
3570 	error = sysctl_handle_int(oidp, &off, 0, req);
3571 	if (error || req->newptr == NULL)
3572 		return error;
3573 	if (off < 0)
3574 		return EINVAL;
3575 
3576 	ifnet_serialize_all(ifp);
3577 	if (off >= ncpus2) {
3578 		error = EINVAL;
3579 	} else {
3580 		error = 0;
3581 		sc->tx_npoll_off = off;
3582 	}
3583 	ifnet_deserialize_all(ifp);
3584 
3585 	return error;
3586 }
3587 
3588 #endif	/* IFPOLL_ENABLE */
3589 
3590 static int
3591 emx_dma_alloc(struct emx_softc *sc)
3592 {
3593 	int error, i;
3594 
3595 	/*
3596 	 * Create top level busdma tag
3597 	 */
3598 	error = bus_dma_tag_create(NULL, 1, 0,
3599 			BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
3600 			NULL, NULL,
3601 			BUS_SPACE_MAXSIZE_32BIT, 0, BUS_SPACE_MAXSIZE_32BIT,
3602 			0, &sc->parent_dtag);
3603 	if (error) {
3604 		device_printf(sc->dev, "could not create top level DMA tag\n");
3605 		return error;
3606 	}
3607 
3608 	/*
3609 	 * Allocate transmit descriptors ring and buffers
3610 	 */
3611 	error = emx_create_tx_ring(sc);
3612 	if (error) {
3613 		device_printf(sc->dev, "Could not setup transmit structures\n");
3614 		return error;
3615 	}
3616 
3617 	/*
3618 	 * Allocate receive descriptors ring and buffers
3619 	 */
3620 	for (i = 0; i < sc->rx_ring_cnt; ++i) {
3621 		error = emx_create_rx_ring(sc, &sc->rx_data[i]);
3622 		if (error) {
3623 			device_printf(sc->dev,
3624 			    "Could not setup receive structures\n");
3625 			return error;
3626 		}
3627 	}
3628 	return 0;
3629 }
3630 
3631 static void
3632 emx_dma_free(struct emx_softc *sc)
3633 {
3634 	int i;
3635 
3636 	emx_destroy_tx_ring(sc, sc->num_tx_desc);
3637 
3638 	for (i = 0; i < sc->rx_ring_cnt; ++i) {
3639 		emx_destroy_rx_ring(sc, &sc->rx_data[i],
3640 				    sc->rx_data[i].num_rx_desc);
3641 	}
3642 
3643 	/* Free top level busdma tag */
3644 	if (sc->parent_dtag != NULL)
3645 		bus_dma_tag_destroy(sc->parent_dtag);
3646 }
3647 
3648 static void
3649 emx_serialize(struct ifnet *ifp, enum ifnet_serialize slz)
3650 {
3651 	struct emx_softc *sc = ifp->if_softc;
3652 
3653 	ifnet_serialize_array_enter(sc->serializes, EMX_NSERIALIZE,
3654 	    EMX_TX_SERIALIZE, EMX_RX_SERIALIZE, slz);
3655 }
3656 
3657 static void
3658 emx_deserialize(struct ifnet *ifp, enum ifnet_serialize slz)
3659 {
3660 	struct emx_softc *sc = ifp->if_softc;
3661 
3662 	ifnet_serialize_array_exit(sc->serializes, EMX_NSERIALIZE,
3663 	    EMX_TX_SERIALIZE, EMX_RX_SERIALIZE, slz);
3664 }
3665 
3666 static int
3667 emx_tryserialize(struct ifnet *ifp, enum ifnet_serialize slz)
3668 {
3669 	struct emx_softc *sc = ifp->if_softc;
3670 
3671 	return ifnet_serialize_array_try(sc->serializes, EMX_NSERIALIZE,
3672 	    EMX_TX_SERIALIZE, EMX_RX_SERIALIZE, slz);
3673 }
3674 
3675 static void
3676 emx_serialize_skipmain(struct emx_softc *sc)
3677 {
3678 	lwkt_serialize_array_enter(sc->serializes, EMX_NSERIALIZE, 1);
3679 }
3680 
3681 static void
3682 emx_deserialize_skipmain(struct emx_softc *sc)
3683 {
3684 	lwkt_serialize_array_exit(sc->serializes, EMX_NSERIALIZE, 1);
3685 }
3686 
3687 #ifdef INVARIANTS
3688 
3689 static void
3690 emx_serialize_assert(struct ifnet *ifp, enum ifnet_serialize slz,
3691     boolean_t serialized)
3692 {
3693 	struct emx_softc *sc = ifp->if_softc;
3694 
3695 	ifnet_serialize_array_assert(sc->serializes, EMX_NSERIALIZE,
3696 	    EMX_TX_SERIALIZE, EMX_RX_SERIALIZE, slz, serialized);
3697 }
3698 
3699 #endif	/* INVARIANTS */
3700 
3701 #ifdef IFPOLL_ENABLE
3702 
3703 static void
3704 emx_npoll_status(struct ifnet *ifp)
3705 {
3706 	struct emx_softc *sc = ifp->if_softc;
3707 	uint32_t reg_icr;
3708 
3709 	ASSERT_SERIALIZED(&sc->main_serialize);
3710 
3711 	reg_icr = E1000_READ_REG(&sc->hw, E1000_ICR);
3712 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
3713 		callout_stop(&sc->timer);
3714 		sc->hw.mac.get_link_status = 1;
3715 		emx_update_link_status(sc);
3716 		callout_reset(&sc->timer, hz, emx_timer, sc);
3717 	}
3718 }
3719 
3720 static void
3721 emx_npoll_tx(struct ifnet *ifp, void *arg __unused, int cycle __unused)
3722 {
3723 	struct emx_softc *sc = ifp->if_softc;
3724 
3725 	ASSERT_SERIALIZED(&sc->tx_serialize);
3726 
3727 	emx_txeof(sc);
3728 	if (!ifq_is_empty(&ifp->if_snd))
3729 		if_devstart(ifp);
3730 }
3731 
3732 static void
3733 emx_npoll_rx(struct ifnet *ifp, void *arg, int cycle)
3734 {
3735 	struct emx_softc *sc = ifp->if_softc;
3736 	struct emx_rxdata *rdata = arg;
3737 
3738 	ASSERT_SERIALIZED(&rdata->rx_serialize);
3739 
3740 	emx_rxeof(sc, rdata - sc->rx_data, cycle);
3741 }
3742 
3743 static void
3744 emx_npoll(struct ifnet *ifp, struct ifpoll_info *info)
3745 {
3746 	struct emx_softc *sc = ifp->if_softc;
3747 
3748 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
3749 
3750 	if (info) {
3751 		int i, off;
3752 
3753 		info->ifpi_status.status_func = emx_npoll_status;
3754 		info->ifpi_status.serializer = &sc->main_serialize;
3755 
3756 		off = sc->tx_npoll_off;
3757 		KKASSERT(off < ncpus2);
3758 		info->ifpi_tx[off].poll_func = emx_npoll_tx;
3759 		info->ifpi_tx[off].arg = NULL;
3760 		info->ifpi_tx[off].serializer = &sc->tx_serialize;
3761 
3762 		off = sc->rx_npoll_off;
3763 		for (i = 0; i < sc->rx_ring_cnt; ++i) {
3764 			struct emx_rxdata *rdata = &sc->rx_data[i];
3765 			int idx = i + off;
3766 
3767 			KKASSERT(idx < ncpus2);
3768 			info->ifpi_rx[idx].poll_func = emx_npoll_rx;
3769 			info->ifpi_rx[idx].arg = rdata;
3770 			info->ifpi_rx[idx].serializer = &rdata->rx_serialize;
3771 		}
3772 
3773 		if (ifp->if_flags & IFF_RUNNING)
3774 			emx_disable_intr(sc);
3775 		ifp->if_npoll_cpuid = sc->tx_npoll_off;
3776 	} else {
3777 		if (ifp->if_flags & IFF_RUNNING)
3778 			emx_enable_intr(sc);
3779 		ifp->if_npoll_cpuid = -1;
3780 	}
3781 }
3782 
3783 #endif	/* IFPOLL_ENABLE */
3784 
3785 static void
3786 emx_set_itr(struct emx_softc *sc, uint32_t itr)
3787 {
3788 	E1000_WRITE_REG(&sc->hw, E1000_ITR, itr);
3789 	if (sc->hw.mac.type == e1000_82574) {
3790 		int i;
3791 
3792 		/*
3793 		 * When using MSIX interrupts we need to
3794 		 * throttle using the EITR register
3795 		 */
3796 		for (i = 0; i < 4; ++i)
3797 			E1000_WRITE_REG(&sc->hw, E1000_EITR_82574(i), itr);
3798 	}
3799 }
3800 
3801 /*
3802  * Disable the L0s, 82574L Errata #20
3803  */
3804 static void
3805 emx_disable_aspm(struct emx_softc *sc)
3806 {
3807 	uint16_t link_cap, link_ctrl, disable;
3808 	uint8_t pcie_ptr, reg;
3809 	device_t dev = sc->dev;
3810 
3811 	switch (sc->hw.mac.type) {
3812 	case e1000_82571:
3813 	case e1000_82572:
3814 	case e1000_82573:
3815 		/*
3816 		 * 82573 specification update
3817 		 * errata #8 disable L0s
3818 		 * errata #41 disable L1
3819 		 *
3820 		 * 82571/82572 specification update
3821 		 # errata #13 disable L1
3822 		 * errata #68 disable L0s
3823 		 */
3824 		disable = PCIEM_LNKCTL_ASPM_L0S | PCIEM_LNKCTL_ASPM_L1;
3825 		break;
3826 
3827 	case e1000_82574:
3828 		/*
3829 		 * 82574 specification update errata #20
3830 		 *
3831 		 * There is no need to disable L1
3832 		 */
3833 		disable = PCIEM_LNKCTL_ASPM_L0S;
3834 		break;
3835 
3836 	default:
3837 		return;
3838 	}
3839 
3840 	pcie_ptr = pci_get_pciecap_ptr(dev);
3841 	if (pcie_ptr == 0)
3842 		return;
3843 
3844 	link_cap = pci_read_config(dev, pcie_ptr + PCIER_LINKCAP, 2);
3845 	if ((link_cap & PCIEM_LNKCAP_ASPM_MASK) == 0)
3846 		return;
3847 
3848 	if (bootverbose)
3849 		if_printf(&sc->arpcom.ac_if, "disable ASPM %#02x\n", disable);
3850 
3851 	reg = pcie_ptr + PCIER_LINKCTRL;
3852 	link_ctrl = pci_read_config(dev, reg, 2);
3853 	link_ctrl &= ~disable;
3854 	pci_write_config(dev, reg, link_ctrl, 2);
3855 }
3856 
3857 static int
3858 emx_tso_pullup(struct emx_softc *sc, struct mbuf **mp)
3859 {
3860 	int iphlen, hoff, thoff, ex = 0;
3861 	struct mbuf *m;
3862 	struct ip *ip;
3863 
3864 	m = *mp;
3865 	KASSERT(M_WRITABLE(m), ("TSO mbuf not writable"));
3866 
3867 	iphlen = m->m_pkthdr.csum_iphlen;
3868 	thoff = m->m_pkthdr.csum_thlen;
3869 	hoff = m->m_pkthdr.csum_lhlen;
3870 
3871 	KASSERT(iphlen > 0, ("invalid ip hlen"));
3872 	KASSERT(thoff > 0, ("invalid tcp hlen"));
3873 	KASSERT(hoff > 0, ("invalid ether hlen"));
3874 
3875 	if (sc->flags & EMX_FLAG_TSO_PULLEX)
3876 		ex = 4;
3877 
3878 	if (m->m_len < hoff + iphlen + thoff + ex) {
3879 		m = m_pullup(m, hoff + iphlen + thoff + ex);
3880 		if (m == NULL) {
3881 			*mp = NULL;
3882 			return ENOBUFS;
3883 		}
3884 		*mp = m;
3885 	}
3886 	ip = mtodoff(m, struct ip *, hoff);
3887 	ip->ip_len = 0;
3888 
3889 	return 0;
3890 }
3891 
3892 static int
3893 emx_tso_setup(struct emx_softc *sc, struct mbuf *mp,
3894     uint32_t *txd_upper, uint32_t *txd_lower)
3895 {
3896 	struct e1000_context_desc *TXD;
3897 	int hoff, iphlen, thoff, hlen;
3898 	int mss, pktlen, curr_txd;
3899 
3900 #ifdef EMX_TSO_DEBUG
3901 	sc->tso_segments++;
3902 #endif
3903 
3904 	iphlen = mp->m_pkthdr.csum_iphlen;
3905 	thoff = mp->m_pkthdr.csum_thlen;
3906 	hoff = mp->m_pkthdr.csum_lhlen;
3907 	mss = mp->m_pkthdr.tso_segsz;
3908 	pktlen = mp->m_pkthdr.len;
3909 
3910 	if (sc->csum_flags == CSUM_TSO &&
3911 	    sc->csum_iphlen == iphlen &&
3912 	    sc->csum_lhlen == hoff &&
3913 	    sc->csum_thlen == thoff &&
3914 	    sc->csum_mss == mss &&
3915 	    sc->csum_pktlen == pktlen) {
3916 		*txd_upper = sc->csum_txd_upper;
3917 		*txd_lower = sc->csum_txd_lower;
3918 #ifdef EMX_TSO_DEBUG
3919 		sc->tso_ctx_reused++;
3920 #endif
3921 		return 0;
3922 	}
3923 	hlen = hoff + iphlen + thoff;
3924 
3925 	/*
3926 	 * Setup a new TSO context.
3927 	 */
3928 
3929 	curr_txd = sc->next_avail_tx_desc;
3930 	TXD = (struct e1000_context_desc *)&sc->tx_desc_base[curr_txd];
3931 
3932 	*txd_lower = E1000_TXD_CMD_DEXT |	/* Extended descr type */
3933 		     E1000_TXD_DTYP_D |		/* Data descr type */
3934 		     E1000_TXD_CMD_TSE;		/* Do TSE on this packet */
3935 
3936 	/* IP and/or TCP header checksum calculation and insertion. */
3937 	*txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
3938 
3939 	/*
3940 	 * Start offset for header checksum calculation.
3941 	 * End offset for header checksum calculation.
3942 	 * Offset of place put the checksum.
3943 	 */
3944 	TXD->lower_setup.ip_fields.ipcss = hoff;
3945 	TXD->lower_setup.ip_fields.ipcse = htole16(hoff + iphlen - 1);
3946 	TXD->lower_setup.ip_fields.ipcso = hoff + offsetof(struct ip, ip_sum);
3947 
3948 	/*
3949 	 * Start offset for payload checksum calculation.
3950 	 * End offset for payload checksum calculation.
3951 	 * Offset of place to put the checksum.
3952 	 */
3953 	TXD->upper_setup.tcp_fields.tucss = hoff + iphlen;
3954 	TXD->upper_setup.tcp_fields.tucse = 0;
3955 	TXD->upper_setup.tcp_fields.tucso =
3956 	    hoff + iphlen + offsetof(struct tcphdr, th_sum);
3957 
3958 	/*
3959 	 * Payload size per packet w/o any headers.
3960 	 * Length of all headers up to payload.
3961 	 */
3962 	TXD->tcp_seg_setup.fields.mss = htole16(mss);
3963 	TXD->tcp_seg_setup.fields.hdr_len = hlen;
3964 	TXD->cmd_and_length = htole32(E1000_TXD_CMD_IFCS |
3965 				E1000_TXD_CMD_DEXT |	/* Extended descr */
3966 				E1000_TXD_CMD_TSE |	/* TSE context */
3967 				E1000_TXD_CMD_IP |	/* Do IP csum */
3968 				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
3969 				(pktlen - hlen));	/* Total len */
3970 
3971 	/* Save the information for this TSO context */
3972 	sc->csum_flags = CSUM_TSO;
3973 	sc->csum_lhlen = hoff;
3974 	sc->csum_iphlen = iphlen;
3975 	sc->csum_thlen = thoff;
3976 	sc->csum_mss = mss;
3977 	sc->csum_pktlen = pktlen;
3978 	sc->csum_txd_upper = *txd_upper;
3979 	sc->csum_txd_lower = *txd_lower;
3980 
3981 	if (++curr_txd == sc->num_tx_desc)
3982 		curr_txd = 0;
3983 
3984 	KKASSERT(sc->num_tx_desc_avail > 0);
3985 	sc->num_tx_desc_avail--;
3986 
3987 	sc->next_avail_tx_desc = curr_txd;
3988 	return 1;
3989 }
3990