xref: /dflybsd-src/sys/dev/netif/em/if_em.c (revision efda3bd00c039d6845508b47bb18d1687c72154e)
1 /*
2  *
3  * Copyright (c) 2004 Joerg Sonnenberger <joerg@bec.de>.  All rights reserved.
4  *
5  * Copyright (c) 2001-2006, Intel Corporation
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions are met:
10  *
11  *  1. Redistributions of source code must retain the above copyright notice,
12  *     this list of conditions and the following disclaimer.
13  *
14  *  2. Redistributions in binary form must reproduce the above copyright
15  *     notice, this list of conditions and the following disclaimer in the
16  *     documentation and/or other materials provided with the distribution.
17  *
18  *  3. Neither the name of the Intel Corporation nor the names of its
19  *     contributors may be used to endorse or promote products derived from
20  *     this software without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
23  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
26  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32  * POSSIBILITY OF SUCH DAMAGE.
33  *
34  *
35  * Copyright (c) 2005 The DragonFly Project.  All rights reserved.
36  *
37  * This code is derived from software contributed to The DragonFly Project
38  * by Matthew Dillon <dillon@backplane.com>
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  *
44  * 1. Redistributions of source code must retain the above copyright
45  *    notice, this list of conditions and the following disclaimer.
46  * 2. Redistributions in binary form must reproduce the above copyright
47  *    notice, this list of conditions and the following disclaimer in
48  *    the documentation and/or other materials provided with the
49  *    distribution.
50  * 3. Neither the name of The DragonFly Project nor the names of its
51  *    contributors may be used to endorse or promote products derived
52  *    from this software without specific, prior written permission.
53  *
54  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
55  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
56  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
57  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
58  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
59  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
60  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
61  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
62  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
63  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
64  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
65  * SUCH DAMAGE.
66  *
67  * $DragonFly: src/sys/dev/netif/em/if_em.c,v 1.48 2006/09/05 00:55:40 dillon Exp $
68  * $FreeBSD$
69  */
70 /*
71  * SERIALIZATION API RULES:
72  *
73  * - If the driver uses the same serializer for the interrupt as for the
74  *   ifnet, most of the serialization will be done automatically for the
75  *   driver.
76  *
77  * - ifmedia entry points will be serialized by the ifmedia code using the
78  *   ifnet serializer.
79  *
80  * - if_* entry points except for if_input will be serialized by the IF
81  *   and protocol layers.
82  *
83  * - The device driver must be sure to serialize access from timeout code
84  *   installed by the device driver.
85  *
86  * - The device driver typically holds the serializer at the time it wishes
87  *   to call if_input.  If so, it should pass the serializer to if_input and
88  *   note that the serializer might be dropped temporarily by if_input
89  *   (e.g. in case it has to bridge the packet to another interface).
90  *
91  *   NOTE!  Since callers into the device driver hold the ifnet serializer,
92  *   the device driver may be holding a serializer at the time it calls
93  *   if_input even if it is not serializer-aware.
94  */
95 
96 #include "opt_polling.h"
97 
98 #include <dev/netif/em/if_em.h>
99 #include <net/ifq_var.h>
100 
101 /*********************************************************************
102  *  Set this to one to display debug statistics
103  *********************************************************************/
104 int             em_display_debug_stats = 0;
105 
106 /*********************************************************************
107  *  Driver version
108  *********************************************************************/
109 
110 char em_driver_version[] = "6.1.4";
111 
112 
113 /*********************************************************************
114  *  PCI Device ID Table
115  *
116  *  Used by probe to select devices to load on
117  *  Last field stores an index into em_strings
118  *  Last entry must be all 0s
119  *
120  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
121  *********************************************************************/
122 
123 static em_vendor_info_t em_vendor_info_array[] =
124 {
125 	/* Intel(R) PRO/1000 Network Connection */
126 	{ 0x8086, E1000_DEV_ID_82540EM,		PCI_ANY_ID, PCI_ANY_ID, 0},
127 	{ 0x8086, E1000_DEV_ID_82540EM_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
128 	{ 0x8086, E1000_DEV_ID_82540EP,		PCI_ANY_ID, PCI_ANY_ID, 0},
129 	{ 0x8086, E1000_DEV_ID_82540EP_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
130 	{ 0x8086, E1000_DEV_ID_82540EP_LP,	PCI_ANY_ID, PCI_ANY_ID, 0},
131 
132 	{ 0x8086, E1000_DEV_ID_82541EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
133 	{ 0x8086, E1000_DEV_ID_82541ER,		PCI_ANY_ID, PCI_ANY_ID, 0},
134 	{ 0x8086, E1000_DEV_ID_82541EI_MOBILE,	PCI_ANY_ID, PCI_ANY_ID, 0},
135 	{ 0x8086, E1000_DEV_ID_82541GI,		PCI_ANY_ID, PCI_ANY_ID, 0},
136 	{ 0x8086, E1000_DEV_ID_82541GI_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
137 	{ 0x8086, E1000_DEV_ID_82541GI_MOBILE,	PCI_ANY_ID, PCI_ANY_ID, 0},
138 
139 	{ 0x8086, E1000_DEV_ID_82542,		PCI_ANY_ID, PCI_ANY_ID, 0},
140 
141 	{ 0x8086, E1000_DEV_ID_82543GC_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
142 	{ 0x8086, E1000_DEV_ID_82543GC_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
143 
144 	{ 0x8086, E1000_DEV_ID_82544EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
145 	{ 0x8086, E1000_DEV_ID_82544EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
146 	{ 0x8086, E1000_DEV_ID_82544GC_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
147 	{ 0x8086, E1000_DEV_ID_82544GC_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
148 
149 	{ 0x8086, E1000_DEV_ID_82545EM_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
150 	{ 0x8086, E1000_DEV_ID_82545EM_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
151 	{ 0x8086, E1000_DEV_ID_82545GM_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
152 	{ 0x8086, E1000_DEV_ID_82545GM_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
153 	{ 0x8086, E1000_DEV_ID_82545GM_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
154 
155 	{ 0x8086, E1000_DEV_ID_82546EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
156 	{ 0x8086, E1000_DEV_ID_82546EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
157 	{ 0x8086, E1000_DEV_ID_82546EB_QUAD_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
158 	{ 0x8086, E1000_DEV_ID_82546GB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
159 	{ 0x8086, E1000_DEV_ID_82546GB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
160 	{ 0x8086, E1000_DEV_ID_82546GB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
161 	{ 0x8086, E1000_DEV_ID_82546GB_PCIE,	PCI_ANY_ID, PCI_ANY_ID, 0},
162 	{ 0x8086, E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3,
163 						PCI_ANY_ID, PCI_ANY_ID, 0},
164 
165 	{ 0x8086, E1000_DEV_ID_82547EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
166 	{ 0x8086, E1000_DEV_ID_82547GI,		PCI_ANY_ID, PCI_ANY_ID, 0},
167 
168 	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
169 	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
170 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
171 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
172 						PCI_ANY_ID, PCI_ANY_ID, 0},
173 
174 	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
175 	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
176 	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
177 	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
178 
179 	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
180 	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
181 	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
182 
183 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
184 						PCI_ANY_ID, PCI_ANY_ID, 0},
185 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
186 						PCI_ANY_ID, PCI_ANY_ID, 0},
187 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
188 						PCI_ANY_ID, PCI_ANY_ID, 0},
189 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
190 						PCI_ANY_ID, PCI_ANY_ID, 0},
191 
192 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
193 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
194 	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
195 
196 	{ 0x8086, 0x101A, PCI_ANY_ID, PCI_ANY_ID, 0},
197 	{ 0x8086, 0x1014, PCI_ANY_ID, PCI_ANY_ID, 0},
198 	/* required last entry */
199 	{ 0, 0, 0, 0, 0}
200 };
201 
202 /*********************************************************************
203  *  Table of branding strings for all supported NICs.
204  *********************************************************************/
205 
206 static const char *em_strings[] = {
207 	"Intel(R) PRO/1000 Network Connection"
208 };
209 
210 /*********************************************************************
211  *  Function prototypes
212  *********************************************************************/
213 static int	em_probe(device_t);
214 static int	em_attach(device_t);
215 static int	em_detach(device_t);
216 static int	em_shutdown(device_t);
217 static void	em_intr(void *);
218 static void	em_start(struct ifnet *);
219 static int	em_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
220 static void	em_watchdog(struct ifnet *);
221 static void	em_init(void *);
222 static void	em_stop(void *);
223 static void	em_media_status(struct ifnet *, struct ifmediareq *);
224 static int	em_media_change(struct ifnet *);
225 static void	em_identify_hardware(struct adapter *);
226 static int	em_allocate_pci_resource(device_t);
227 static void	em_free_pci_resource(device_t);
228 static void	em_local_timer(void *);
229 static int	em_hardware_init(struct adapter *);
230 static void	em_setup_interface(device_t, struct adapter *);
231 static int	em_setup_transmit_structures(struct adapter *);
232 static void	em_initialize_transmit_unit(struct adapter *);
233 static int	em_setup_receive_structures(struct adapter *);
234 static void	em_initialize_receive_unit(struct adapter *);
235 static void	em_enable_intr(struct adapter *);
236 static void	em_disable_intr(struct adapter *);
237 static void	em_free_transmit_structures(struct adapter *);
238 static void	em_free_receive_structures(struct adapter *);
239 static void	em_update_stats_counters(struct adapter *);
240 static void	em_clean_transmit_interrupts(struct adapter *);
241 static int	em_allocate_receive_structures(struct adapter *);
242 static int	em_allocate_transmit_structures(struct adapter *);
243 static void	em_process_receive_interrupts(struct adapter *, int);
244 static void	em_receive_checksum(struct adapter *, struct em_rx_desc *,
245 				    struct mbuf *);
246 static void	em_transmit_checksum_setup(struct adapter *, struct mbuf *,
247 					   uint32_t *, uint32_t *);
248 static void	em_set_promisc(struct adapter *);
249 static void	em_disable_promisc(struct adapter *);
250 static void	em_set_multi(struct adapter *);
251 static void	em_print_hw_stats(struct adapter *);
252 static void	em_print_link_status(struct adapter *);
253 static int	em_get_buf(int i, struct adapter *, struct mbuf *, int how);
254 static void	em_enable_vlans(struct adapter *);
255 static void	em_disable_vlans(struct adapter *);
256 static int	em_encap(struct adapter *, struct mbuf *);
257 static void	em_smartspeed(struct adapter *);
258 static int	em_82547_fifo_workaround(struct adapter *, int);
259 static void	em_82547_update_fifo_head(struct adapter *, int);
260 static int	em_82547_tx_fifo_reset(struct adapter *);
261 static void	em_82547_move_tail(void *);
262 static void	em_82547_move_tail_serialized(struct adapter *);
263 static int	em_dma_malloc(struct adapter *, bus_size_t,
264 			      struct em_dma_alloc *, int);
265 static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
266 static void	em_print_debug_info(struct adapter *);
267 static int	em_is_valid_ether_addr(uint8_t *);
268 static int	em_sysctl_stats(SYSCTL_HANDLER_ARGS);
269 static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
270 static uint32_t	em_fill_descriptors(uint64_t address, uint32_t length,
271 				   PDESC_ARRAY desc_array);
272 static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
273 static int	em_sysctl_int_throttle(SYSCTL_HANDLER_ARGS);
274 static void	em_add_int_delay_sysctl(struct adapter *, const char *,
275 					const char *,
276 					struct em_int_delay_info *, int, int);
277 
278 /*********************************************************************
279  *  FreeBSD Device Interface Entry Points
280  *********************************************************************/
281 
282 static device_method_t em_methods[] = {
283 	/* Device interface */
284 	DEVMETHOD(device_probe, em_probe),
285 	DEVMETHOD(device_attach, em_attach),
286 	DEVMETHOD(device_detach, em_detach),
287 	DEVMETHOD(device_shutdown, em_shutdown),
288 	{0, 0}
289 };
290 
291 static driver_t em_driver = {
292 	"em", em_methods, sizeof(struct adapter),
293 };
294 
295 static devclass_t em_devclass;
296 
297 DECLARE_DUMMY_MODULE(if_em);
298 DRIVER_MODULE(if_em, pci, em_driver, em_devclass, 0, 0);
299 
300 /*********************************************************************
301  *  Tunable default values.
302  *********************************************************************/
303 
304 #define E1000_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
305 #define E1000_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
306 
307 static int em_tx_int_delay_dflt = E1000_TICKS_TO_USECS(EM_TIDV);
308 static int em_rx_int_delay_dflt = E1000_TICKS_TO_USECS(EM_RDTR);
309 static int em_tx_abs_int_delay_dflt = E1000_TICKS_TO_USECS(EM_TADV);
310 static int em_rx_abs_int_delay_dflt = E1000_TICKS_TO_USECS(EM_RADV);
311 static int em_int_throttle_ceil = 10000;
312 static int em_rxd = EM_DEFAULT_RXD;
313 static int em_txd = EM_DEFAULT_TXD;
314 static int em_smart_pwr_down = FALSE;
315 
316 TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
317 TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
318 TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
319 TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
320 TUNABLE_INT("hw.em.int_throttle_ceil", &em_int_throttle_ceil);
321 TUNABLE_INT("hw.em.rxd", &em_rxd);
322 TUNABLE_INT("hw.em.txd", &em_txd);
323 TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
324 
325 /*
326  * Kernel trace for characterization of operations
327  */
328 #if !defined(KTR_IF_EM)
329 #define KTR_IF_EM	KTR_ALL
330 #endif
331 KTR_INFO_MASTER(if_em);
332 KTR_INFO(KTR_IF_EM, if_em, intr_beg, 0, "intr begin", 0);
333 KTR_INFO(KTR_IF_EM, if_em, intr_end, 1, "intr end", 0);
334 KTR_INFO(KTR_IF_EM, if_em, poll_beg, 2, "poll begin", 0);
335 KTR_INFO(KTR_IF_EM, if_em, poll_end, 3, "poll end", 0);
336 KTR_INFO(KTR_IF_EM, if_em, pkt_receive, 4, "rx packet", 0);
337 KTR_INFO(KTR_IF_EM, if_em, pkt_txqueue, 5, "tx packet", 0);
338 KTR_INFO(KTR_IF_EM, if_em, pkt_txclean, 6, "tx clean", 0);
339 #define logif(name)	KTR_LOG(if_em_ ## name)
340 
341 /*********************************************************************
342  *  Device identification routine
343  *
344  *  em_probe determines if the driver should be loaded on
345  *  adapter based on PCI vendor/device id of the adapter.
346  *
347  *  return 0 on success, positive on failure
348  *********************************************************************/
349 
350 static int
351 em_probe(device_t dev)
352 {
353 	em_vendor_info_t *ent;
354 
355 	uint16_t pci_vendor_id = 0;
356 	uint16_t pci_device_id = 0;
357 	uint16_t pci_subvendor_id = 0;
358 	uint16_t pci_subdevice_id = 0;
359 	char adapter_name[60];
360 
361 	INIT_DEBUGOUT("em_probe: begin");
362 
363 	pci_vendor_id = pci_get_vendor(dev);
364 	if (pci_vendor_id != EM_VENDOR_ID)
365 		return(ENXIO);
366 
367 	pci_device_id = pci_get_device(dev);
368 	pci_subvendor_id = pci_get_subvendor(dev);
369 	pci_subdevice_id = pci_get_subdevice(dev);
370 
371 	ent = em_vendor_info_array;
372 	while (ent->vendor_id != 0) {
373 		if ((pci_vendor_id == ent->vendor_id) &&
374 		    (pci_device_id == ent->device_id) &&
375 
376 		    ((pci_subvendor_id == ent->subvendor_id) ||
377 		     (ent->subvendor_id == PCI_ANY_ID)) &&
378 
379 		    ((pci_subdevice_id == ent->subdevice_id) ||
380 		     (ent->subdevice_id == PCI_ANY_ID))) {
381 			snprintf(adapter_name, sizeof(adapter_name),
382 				 "%s, Version - %s",  em_strings[ent->index],
383 				 em_driver_version);
384 			device_set_desc_copy(dev, adapter_name);
385 			return(0);
386 		}
387 		ent++;
388 	}
389 
390 	return(ENXIO);
391 }
392 
393 /*********************************************************************
394  *  Device initialization routine
395  *
396  *  The attach entry point is called when the driver is being loaded.
397  *  This routine identifies the type of hardware, allocates all resources
398  *  and initializes the hardware.
399  *
400  *  return 0 on success, positive on failure
401  *********************************************************************/
402 
403 static int
404 em_attach(device_t dev)
405 {
406 	struct adapter *adapter;
407 	int tsize, rsize;
408 	int error = 0;
409 
410 	INIT_DEBUGOUT("em_attach: begin");
411 
412 	adapter = device_get_softc(dev);
413 
414 	callout_init(&adapter->timer);
415 	callout_init(&adapter->tx_fifo_timer);
416 
417 	adapter->dev = dev;
418 	adapter->osdep.dev = dev;
419 
420 	/* SYSCTL stuff */
421 	sysctl_ctx_init(&adapter->sysctl_ctx);
422 	adapter->sysctl_tree = SYSCTL_ADD_NODE(&adapter->sysctl_ctx,
423 					       SYSCTL_STATIC_CHILDREN(_hw),
424 					       OID_AUTO,
425 					       device_get_nameunit(dev),
426 					       CTLFLAG_RD,
427 					       0, "");
428 
429 	if (adapter->sysctl_tree == NULL) {
430 		device_printf(dev, "Unable to create sysctl tree\n");
431 		return EIO;
432 	}
433 
434 	SYSCTL_ADD_PROC(&adapter->sysctl_ctx,
435 			SYSCTL_CHILDREN(adapter->sysctl_tree),
436 			OID_AUTO, "debug_info", CTLTYPE_INT|CTLFLAG_RW,
437 			(void *)adapter, 0,
438 			em_sysctl_debug_info, "I", "Debug Information");
439 
440 	SYSCTL_ADD_PROC(&adapter->sysctl_ctx,
441 			SYSCTL_CHILDREN(adapter->sysctl_tree),
442 			OID_AUTO, "stats", CTLTYPE_INT|CTLFLAG_RW,
443 			(void *)adapter, 0,
444 			em_sysctl_stats, "I", "Statistics");
445 
446 	/* Determine hardware revision */
447 	em_identify_hardware(adapter);
448 
449 	/* Set up some sysctls for the tunable interrupt delays */
450 	em_add_int_delay_sysctl(adapter, "rx_int_delay",
451 				"receive interrupt delay in usecs",
452 				&adapter->rx_int_delay,
453 				E1000_REG_OFFSET(&adapter->hw, RDTR),
454 				em_rx_int_delay_dflt);
455         em_add_int_delay_sysctl(adapter, "tx_int_delay",
456 				"transmit interrupt delay in usecs",
457 				&adapter->tx_int_delay,
458 				E1000_REG_OFFSET(&adapter->hw, TIDV),
459 				em_tx_int_delay_dflt);
460 	if (adapter->hw.mac_type >= em_82540) {
461 		em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
462 					"receive interrupt delay limit in usecs",
463 					&adapter->rx_abs_int_delay,
464 					E1000_REG_OFFSET(&adapter->hw, RADV),
465 					em_rx_abs_int_delay_dflt);
466 		em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
467 					"transmit interrupt delay limit in usecs",
468 					&adapter->tx_abs_int_delay,
469 					E1000_REG_OFFSET(&adapter->hw, TADV),
470 					em_tx_abs_int_delay_dflt);
471 		SYSCTL_ADD_PROC(&adapter->sysctl_ctx,
472 			SYSCTL_CHILDREN(adapter->sysctl_tree),
473 			OID_AUTO, "int_throttle_ceil", CTLTYPE_INT|CTLFLAG_RW,
474 			adapter, 0, em_sysctl_int_throttle, "I", NULL);
475 	}
476 
477 	/*
478 	 * Validate number of transmit and receive descriptors. It
479 	 * must not exceed hardware maximum, and must be multiple
480 	 * of EM_DBA_ALIGN (128)
481 	 */
482 	if (((em_txd * sizeof(struct em_tx_desc)) % EM_DBA_ALIGN) != 0 ||
483 	    (adapter->hw.mac_type >= em_82544 && em_txd > EM_MAX_TXD) ||
484 	    (adapter->hw.mac_type < em_82544 && em_txd > EM_MAX_TXD_82543) ||
485 	    (em_txd < EM_MIN_TXD)) {
486 		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
487 			      EM_DEFAULT_TXD, em_txd);
488 		adapter->num_tx_desc = EM_DEFAULT_TXD;
489 	} else {
490 		adapter->num_tx_desc = em_txd;
491 	}
492 
493 	if (((em_rxd * sizeof(struct em_rx_desc)) % EM_DBA_ALIGN) != 0 ||
494 	    (adapter->hw.mac_type >= em_82544 && em_rxd > EM_MAX_RXD) ||
495 	    (adapter->hw.mac_type < em_82544 && em_rxd > EM_MAX_RXD_82543) ||
496 	    (em_rxd < EM_MIN_RXD)) {
497 		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
498 			      EM_DEFAULT_RXD, em_rxd);
499 		adapter->num_rx_desc = EM_DEFAULT_RXD;
500 	} else {
501 		adapter->num_rx_desc = em_rxd;
502 	}
503 
504 	adapter->hw.autoneg = DO_AUTO_NEG;
505 	adapter->hw.wait_autoneg_complete = WAIT_FOR_AUTO_NEG_DEFAULT;
506 	adapter->hw.autoneg_advertised = AUTONEG_ADV_DEFAULT;
507 	adapter->hw.tbi_compatibility_en = TRUE;
508 	adapter->rx_buffer_len = EM_RXBUFFER_2048;
509 
510 	adapter->hw.phy_init_script = 1;
511 	adapter->hw.phy_reset_disable = FALSE;
512 
513 #ifndef EM_MASTER_SLAVE
514 	adapter->hw.master_slave = em_ms_hw_default;
515 #else
516 	adapter->hw.master_slave = EM_MASTER_SLAVE;
517 #endif
518 
519 	/*
520 	 * Set the max frame size assuming standard ethernet
521 	 * sized frames
522 	 */
523 	adapter->hw.max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHER_CRC_LEN;
524 
525 	adapter->hw.min_frame_size =
526 	    MINIMUM_ETHERNET_PACKET_SIZE + ETHER_CRC_LEN;
527 
528 	/*
529 	 * This controls when hardware reports transmit completion
530 	 * status.
531 	 */
532 	adapter->hw.report_tx_early = 1;
533 
534 	error = em_allocate_pci_resource(dev);
535 	if (error)
536 		goto fail;
537 
538 	/* Initialize eeprom parameters */
539 	em_init_eeprom_params(&adapter->hw);
540 
541 	tsize = roundup2(adapter->num_tx_desc * sizeof(struct em_tx_desc),
542 			 EM_DBA_ALIGN);
543 
544 	/* Allocate Transmit Descriptor ring */
545 	if (em_dma_malloc(adapter, tsize, &adapter->txdma, BUS_DMA_WAITOK)) {
546 		device_printf(dev, "Unable to allocate TxDescriptor memory\n");
547 		error = ENOMEM;
548 		goto fail;
549 	}
550 	adapter->tx_desc_base = (struct em_tx_desc *) adapter->txdma.dma_vaddr;
551 
552 	rsize = roundup2(adapter->num_rx_desc * sizeof(struct em_rx_desc),
553 			 EM_DBA_ALIGN);
554 
555 	/* Allocate Receive Descriptor ring */
556 	if (em_dma_malloc(adapter, rsize, &adapter->rxdma, BUS_DMA_WAITOK)) {
557 		device_printf(dev, "Unable to allocate rx_desc memory\n");
558 		error = ENOMEM;
559 		goto fail;
560 	}
561 	adapter->rx_desc_base = (struct em_rx_desc *) adapter->rxdma.dma_vaddr;
562 
563 	/* Initialize the hardware */
564 	if (em_hardware_init(adapter)) {
565 		device_printf(dev, "Unable to initialize the hardware\n");
566 		error = EIO;
567 		goto fail;
568 	}
569 
570 	/* Copy the permanent MAC address out of the EEPROM */
571 	if (em_read_mac_addr(&adapter->hw) < 0) {
572 		device_printf(dev,
573 			      "EEPROM read error while reading mac address\n");
574 		error = EIO;
575 		goto fail;
576 	}
577 
578 	if (!em_is_valid_ether_addr(adapter->hw.mac_addr)) {
579 		device_printf(dev, "Invalid mac address\n");
580 		error = EIO;
581 		goto fail;
582 	}
583 
584 	/* Setup OS specific network interface */
585 	em_setup_interface(dev, adapter);
586 
587 	/* Initialize statistics */
588 	em_clear_hw_cntrs(&adapter->hw);
589 	em_update_stats_counters(adapter);
590 	adapter->hw.get_link_status = 1;
591 	em_check_for_link(&adapter->hw);
592 
593 	/* Print the link status */
594 	if (adapter->link_active == 1) {
595 		em_get_speed_and_duplex(&adapter->hw, &adapter->link_speed,
596 					&adapter->link_duplex);
597 		device_printf(dev, "Speed: %d Mbps, Duplex: %s\n",
598 		    adapter->link_speed,
599 		    adapter->link_duplex == FULL_DUPLEX ? "Full" : "Half");
600 	} else {
601 		device_printf(dev, "Speed: N/A, Duplex:N/A\n");
602 	}
603 
604 	/* Indicate SOL/IDER usage */
605 	if (em_check_phy_reset_block(&adapter->hw)) {
606 		device_printf(dev, "PHY reset is blocked due to "
607 			      "SOL/IDER session.\n");
608 	}
609 
610 	/* Identify 82544 on PCIX */
611 	em_get_bus_info(&adapter->hw);
612 	if (adapter->hw.bus_type == em_bus_type_pcix &&
613 	    adapter->hw.mac_type == em_82544)
614 		adapter->pcix_82544 = TRUE;
615         else
616 		adapter->pcix_82544 = FALSE;
617 
618 	error = bus_setup_intr(dev, adapter->res_interrupt, INTR_NETSAFE,
619 			   em_intr, adapter,
620 			   &adapter->int_handler_tag,
621 			   adapter->interface_data.ac_if.if_serializer);
622 	if (error) {
623 		device_printf(dev, "Error registering interrupt handler!\n");
624 		ether_ifdetach(&adapter->interface_data.ac_if);
625 		goto fail;
626 	}
627 
628 	INIT_DEBUGOUT("em_attach: end");
629 	return(0);
630 
631 fail:
632 	em_detach(dev);
633 	return(error);
634 }
635 
636 /*********************************************************************
637  *  Device removal routine
638  *
639  *  The detach entry point is called when the driver is being removed.
640  *  This routine stops the adapter and deallocates all the resources
641  *  that were allocated for driver operation.
642  *
643  *  return 0 on success, positive on failure
644  *********************************************************************/
645 
646 static int
647 em_detach(device_t dev)
648 {
649 	struct adapter *adapter = device_get_softc(dev);
650 
651 	INIT_DEBUGOUT("em_detach: begin");
652 
653 	if (device_is_attached(dev)) {
654 		struct ifnet *ifp = &adapter->interface_data.ac_if;
655 
656 		lwkt_serialize_enter(ifp->if_serializer);
657 		adapter->in_detach = 1;
658 		em_stop(adapter);
659 		em_phy_hw_reset(&adapter->hw);
660 		bus_teardown_intr(dev, adapter->res_interrupt,
661 				  adapter->int_handler_tag);
662 		lwkt_serialize_exit(ifp->if_serializer);
663 
664 		ether_ifdetach(ifp);
665 	}
666 	bus_generic_detach(dev);
667 
668 	em_free_pci_resource(dev);
669 
670 	/* Free Transmit Descriptor ring */
671 	if (adapter->tx_desc_base != NULL) {
672 		em_dma_free(adapter, &adapter->txdma);
673 		adapter->tx_desc_base = NULL;
674 	}
675 
676 	/* Free Receive Descriptor ring */
677 	if (adapter->rx_desc_base != NULL) {
678 		em_dma_free(adapter, &adapter->rxdma);
679 		adapter->rx_desc_base = NULL;
680 	}
681 
682 	/* Free sysctl tree */
683 	if (adapter->sysctl_tree != NULL) {
684 		adapter->sysctl_tree = NULL;
685 		sysctl_ctx_free(&adapter->sysctl_ctx);
686 	}
687 
688 	return(0);
689 }
690 
691 /*********************************************************************
692  *
693  *  Shutdown entry point
694  *
695  **********************************************************************/
696 
697 static int
698 em_shutdown(device_t dev)
699 {
700 	struct adapter *adapter = device_get_softc(dev);
701 	struct ifnet *ifp = &adapter->interface_data.ac_if;
702 
703 	lwkt_serialize_enter(ifp->if_serializer);
704 	em_stop(adapter);
705 	lwkt_serialize_exit(ifp->if_serializer);
706 
707 	return(0);
708 }
709 
710 /*********************************************************************
711  *  Transmit entry point
712  *
713  *  em_start is called by the stack to initiate a transmit.
714  *  The driver will remain in this routine as long as there are
715  *  packets to transmit and transmit resources are available.
716  *  In case resources are not available stack is notified and
717  *  the packet is requeued.
718  **********************************************************************/
719 
720 static void
721 em_start(struct ifnet *ifp)
722 {
723 	struct mbuf *m_head;
724 	struct adapter *adapter = ifp->if_softc;
725 
726 	ASSERT_SERIALIZED(ifp->if_serializer);
727 
728 	if (!adapter->link_active)
729 		return;
730 	while (!ifq_is_empty(&ifp->if_snd)) {
731 		m_head = ifq_poll(&ifp->if_snd);
732 
733 		if (m_head == NULL)
734 			break;
735 
736 		logif(pkt_txqueue);
737 		if (em_encap(adapter, m_head)) {
738 			ifp->if_flags |= IFF_OACTIVE;
739 			break;
740 		}
741 		ifq_dequeue(&ifp->if_snd, m_head);
742 
743 		/* Send a copy of the frame to the BPF listener */
744 		BPF_MTAP(ifp, m_head);
745 
746 		/* Set timeout in case hardware has problems transmitting */
747 		ifp->if_timer = EM_TX_TIMEOUT;
748 	}
749 }
750 
751 /*********************************************************************
752  *  Ioctl entry point
753  *
754  *  em_ioctl is called when the user wants to configure the
755  *  interface.
756  *
757  *  return 0 on success, positive on failure
758  **********************************************************************/
759 
760 static int
761 em_ioctl(struct ifnet *ifp, u_long command, caddr_t data, struct ucred *cr)
762 {
763 	int max_frame_size, mask, error = 0, reinit = 0;
764 	struct ifreq *ifr = (struct ifreq *) data;
765 	struct adapter *adapter = ifp->if_softc;
766 	uint16_t eeprom_data = 0;
767 
768 	ASSERT_SERIALIZED(ifp->if_serializer);
769 
770 	if (adapter->in_detach)
771 		return 0;
772 
773 	switch (command) {
774 	case SIOCSIFMTU:
775 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
776 		switch (adapter->hw.mac_type) {
777 		case em_82573:
778 			/*
779 			 * 82573 only supports jumbo frames
780 			 * if ASPM is disabled.
781 			 */
782 			em_read_eeprom(&adapter->hw, EEPROM_INIT_3GIO_3,
783 			    1, &eeprom_data);
784 			if (eeprom_data & EEPROM_WORD1A_ASPM_MASK) {
785 				max_frame_size = ETHER_MAX_LEN;
786 				break;
787 			}
788 			/* Allow Jumbo frames */
789 			/* FALLTHROUGH */
790 		case em_82571:
791 		case em_82572:
792 		case em_80003es2lan:	/* Limit Jumbo Frame size */
793 			max_frame_size = 9234;
794 			break;
795 		case em_ich8lan:
796 			/* ICH8 does not support jumbo frames */
797 			max_frame_size = ETHER_MAX_LEN;
798 			break;
799 		default:
800 			max_frame_size = MAX_JUMBO_FRAME_SIZE;
801 			break;
802 		}
803 		if (ifr->ifr_mtu >
804 			max_frame_size - ETHER_HDR_LEN - ETHER_CRC_LEN) {
805 			error = EINVAL;
806 		} else {
807 			ifp->if_mtu = ifr->ifr_mtu;
808 			adapter->hw.max_frame_size =
809 			ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
810 			em_init(adapter);
811 		}
812 		break;
813 	case SIOCSIFFLAGS:
814 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFFLAGS "
815 			       "(Set Interface Flags)");
816 		if (ifp->if_flags & IFF_UP) {
817 			if (!(ifp->if_flags & IFF_RUNNING))
818 				em_init(adapter);
819 			em_disable_promisc(adapter);
820 			em_set_promisc(adapter);
821 		} else {
822 			if (ifp->if_flags & IFF_RUNNING)
823 				em_stop(adapter);
824 		}
825 		break;
826 	case SIOCADDMULTI:
827 	case SIOCDELMULTI:
828 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
829 		if (ifp->if_flags & IFF_RUNNING) {
830 			em_disable_intr(adapter);
831 			em_set_multi(adapter);
832 			if (adapter->hw.mac_type == em_82542_rev2_0)
833 				em_initialize_receive_unit(adapter);
834 #ifdef DEVICE_POLLING
835 			/* Do not enable interrupt if polling(4) is enabled */
836 			if ((ifp->if_flags & IFF_POLLING) == 0)
837 #endif
838 			em_enable_intr(adapter);
839 		}
840 		break;
841 	case SIOCSIFMEDIA:
842 	case SIOCGIFMEDIA:
843 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCxIFMEDIA "
844 			       "(Get/Set Interface Media)");
845 		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
846 		break;
847 	case SIOCSIFCAP:
848 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
849 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
850 		if (mask & IFCAP_HWCSUM) {
851 			ifp->if_capenable ^= IFCAP_HWCSUM;
852 			reinit = 1;
853 		}
854 		if (mask & IFCAP_VLAN_HWTAGGING) {
855 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
856 			reinit = 1;
857 		}
858 		if (reinit && (ifp->if_flags & IFF_RUNNING))
859 			em_init(adapter);
860 		break;
861 	default:
862 		error = ether_ioctl(ifp, command, data);
863 		break;
864 	}
865 
866 	return(error);
867 }
868 
869 /*********************************************************************
870  *  Watchdog entry point
871  *
872  *  This routine is called whenever hardware quits transmitting.
873  *
874  **********************************************************************/
875 
876 static void
877 em_watchdog(struct ifnet *ifp)
878 {
879 	struct adapter *adapter = ifp->if_softc;
880 
881 	/*
882 	 * If we are in this routine because of pause frames, then
883 	 * don't reset the hardware.
884 	 */
885 	if (E1000_READ_REG(&adapter->hw, STATUS) & E1000_STATUS_TXOFF) {
886 		ifp->if_timer = EM_TX_TIMEOUT;
887 		return;
888 	}
889 
890 #ifdef foo
891 	if (em_check_for_link(&adapter->hw))
892 #endif
893 		if_printf(ifp, "watchdog timeout -- resetting\n");
894 
895 	ifp->if_flags &= ~IFF_RUNNING;
896 
897 	em_init(adapter);
898 
899 	adapter->watchdog_timeouts++;
900 }
901 
902 /*********************************************************************
903  *  Init entry point
904  *
905  *  This routine is used in two ways. It is used by the stack as
906  *  init entry point in network interface structure. It is also used
907  *  by the driver as a hw/sw initialization routine to get to a
908  *  consistent state.
909  *
910  *  return 0 on success, positive on failure
911  **********************************************************************/
912 
913 static void
914 em_init(void *arg)
915 {
916 	struct adapter *adapter = arg;
917 	uint32_t pba;
918 	struct ifnet *ifp = &adapter->interface_data.ac_if;
919 
920 	INIT_DEBUGOUT("em_init: begin");
921 
922 	em_stop(adapter);
923 
924 	/*
925 	 * Packet Buffer Allocation (PBA)
926 	 * Writing PBA sets the receive portion of the buffer
927 	 * the remainder is used for the transmit buffer.
928 	 *
929 	 * Devices before the 82547 had a Packet Buffer of 64K.
930 	 *   Default allocation: PBA=48K for Rx, leaving 16K for Tx.
931 	 * After the 82547 the buffer was reduced to 40K.
932 	 *   Default allocation: PBA=30K for Rx, leaving 10K for Tx.
933 	 *   Note: default does not leave enough room for Jumbo Frame >10k.
934 	 */
935 	switch (adapter->hw.mac_type) {
936 	case em_82547:
937 	case em_82547_rev_2: /* 82547: Total Packet Buffer is 40K */
938 		if (adapter->hw.max_frame_size > EM_RXBUFFER_8192)
939 			pba = E1000_PBA_22K; /* 22K for Rx, 18K for Tx */
940 		else
941 			pba = E1000_PBA_30K; /* 30K for Rx, 10K for Tx */
942 
943 		adapter->tx_fifo_head = 0;
944 		adapter->tx_head_addr = pba << EM_TX_HEAD_ADDR_SHIFT;
945 		adapter->tx_fifo_size =
946 			(E1000_PBA_40K - pba) << EM_PBA_BYTES_SHIFT;
947 		break;
948 	case em_80003es2lan: /* 80003es2lan: Total Packet Buffer is 48K */
949 	case em_82571: /* 82571: Total Packet Buffer is 48K */
950 	case em_82572: /* 82572: Total Packet Buffer is 48K */
951 		pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
952 		break;
953 	case em_82573: /* 82573: Total Packet Buffer is 32K */
954 		/* Jumbo frames not supported */
955 		pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
956 		break;
957 	case em_ich8lan:
958 		pba = E1000_PBA_8K;
959 		break;
960 	default:
961 		/* Devices before 82547 had a Packet Buffer of 64K.   */
962 		if(adapter->hw.max_frame_size > EM_RXBUFFER_8192)
963 			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
964 		else
965 			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
966 	}
967 
968 	INIT_DEBUGOUT1("em_init: pba=%dK",pba);
969 	E1000_WRITE_REG(&adapter->hw, PBA, pba);
970 
971 	/* Get the latest mac address, User can use a LAA */
972 	bcopy(adapter->interface_data.ac_enaddr, adapter->hw.mac_addr,
973 	      ETHER_ADDR_LEN);
974 
975 	/* Initialize the hardware */
976 	if (em_hardware_init(adapter)) {
977 		if_printf(ifp, "Unable to initialize the hardware\n");
978 		return;
979 	}
980 
981 	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
982 		em_enable_vlans(adapter);
983 
984 	/* Prepare transmit descriptors and buffers */
985 	if (em_setup_transmit_structures(adapter)) {
986 		if_printf(ifp, "Could not setup transmit structures\n");
987 		em_stop(adapter);
988 		return;
989 	}
990 	em_initialize_transmit_unit(adapter);
991 
992 	/* Setup Multicast table */
993 	em_set_multi(adapter);
994 
995 	/* Prepare receive descriptors and buffers */
996 	if (em_setup_receive_structures(adapter)) {
997 		if_printf(ifp, "Could not setup receive structures\n");
998 		em_stop(adapter);
999 		return;
1000 	}
1001 	em_initialize_receive_unit(adapter);
1002 
1003 	/* Don't loose promiscuous settings */
1004 	em_set_promisc(adapter);
1005 
1006 	ifp->if_flags |= IFF_RUNNING;
1007 	ifp->if_flags &= ~IFF_OACTIVE;
1008 
1009 	if (adapter->hw.mac_type >= em_82543) {
1010 		if (ifp->if_capenable & IFCAP_TXCSUM)
1011 			ifp->if_hwassist = EM_CHECKSUM_FEATURES;
1012 		else
1013 			ifp->if_hwassist = 0;
1014 	}
1015 
1016 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1017 	em_clear_hw_cntrs(&adapter->hw);
1018 
1019 #ifdef DEVICE_POLLING
1020 	/* Do not enable interrupt if polling(4) is enabled */
1021 	if (ifp->if_flags & IFF_POLLING)
1022 		em_disable_intr(adapter);
1023 	else
1024 #endif
1025 	em_enable_intr(adapter);
1026 
1027 	/* Don't reset the phy next time init gets called */
1028 	adapter->hw.phy_reset_disable = TRUE;
1029 }
1030 
1031 #ifdef DEVICE_POLLING
1032 
1033 static void
1034 em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1035 {
1036 	struct adapter *adapter = ifp->if_softc;
1037 	uint32_t reg_icr;
1038 
1039 	logif(poll_beg);
1040 
1041 	ASSERT_SERIALIZED(ifp->if_serializer);
1042 
1043 	switch(cmd) {
1044 	case POLL_REGISTER:
1045 		em_disable_intr(adapter);
1046 		break;
1047 	case POLL_DEREGISTER:
1048 		em_enable_intr(adapter);
1049 		break;
1050 	case POLL_AND_CHECK_STATUS:
1051 		reg_icr = E1000_READ_REG(&adapter->hw, ICR);
1052 		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1053 			callout_stop(&adapter->timer);
1054 			adapter->hw.get_link_status = 1;
1055 			em_check_for_link(&adapter->hw);
1056 			em_print_link_status(adapter);
1057 			callout_reset(&adapter->timer, hz, em_local_timer,
1058 				      adapter);
1059 		}
1060 		/* fall through */
1061 	case POLL_ONLY:
1062 		if (ifp->if_flags & IFF_RUNNING) {
1063 			em_process_receive_interrupts(adapter, count);
1064 			em_clean_transmit_interrupts(adapter);
1065 
1066 			if (!ifq_is_empty(&ifp->if_snd))
1067 				em_start(ifp);
1068 		}
1069 		break;
1070 	}
1071 	logif(poll_end);
1072 }
1073 
1074 #endif /* DEVICE_POLLING */
1075 
1076 /*********************************************************************
1077  *
1078  *  Interrupt Service routine
1079  *
1080  **********************************************************************/
1081 static void
1082 em_intr(void *arg)
1083 {
1084 	uint32_t reg_icr;
1085 	struct ifnet *ifp;
1086 	struct adapter *adapter = arg;
1087 
1088 	ifp = &adapter->interface_data.ac_if;
1089 
1090 	logif(intr_beg);
1091 	ASSERT_SERIALIZED(ifp->if_serializer);
1092 
1093 	reg_icr = E1000_READ_REG(&adapter->hw, ICR);
1094 	if ((adapter->hw.mac_type >= em_82571 &&
1095 	     (reg_icr & E1000_ICR_INT_ASSERTED) == 0) ||
1096 	    reg_icr == 0) {
1097 		logif(intr_end);
1098 		return;
1099 	}
1100 
1101 	if (reg_icr & E1000_ICR_RXO)
1102 		adapter->rx_overruns++;
1103 
1104 	/* Link status change */
1105 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1106 		callout_stop(&adapter->timer);
1107 		adapter->hw.get_link_status = 1;
1108 		em_check_for_link(&adapter->hw);
1109 		em_print_link_status(adapter);
1110 		callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1111 	}
1112 
1113 	/*
1114 	 * note: do not attempt to improve efficiency by looping.  This
1115 	 * only results in unnecessary piecemeal collection of received
1116 	 * packets and unnecessary piecemeal cleanups of the transmit ring.
1117 	 */
1118 	if (ifp->if_flags & IFF_RUNNING) {
1119 		em_process_receive_interrupts(adapter, -1);
1120 		em_clean_transmit_interrupts(adapter);
1121 	}
1122 
1123 	if ((ifp->if_flags & IFF_RUNNING) && !ifq_is_empty(&ifp->if_snd))
1124 		em_start(ifp);
1125 	logif(intr_end);
1126 }
1127 
1128 /*********************************************************************
1129  *
1130  *  Media Ioctl callback
1131  *
1132  *  This routine is called whenever the user queries the status of
1133  *  the interface using ifconfig.
1134  *
1135  **********************************************************************/
1136 static void
1137 em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1138 {
1139 	struct adapter * adapter = ifp->if_softc;
1140 	u_char fiber_type = IFM_1000_SX;
1141 
1142 	INIT_DEBUGOUT("em_media_status: begin");
1143 
1144 	ASSERT_SERIALIZED(ifp->if_serializer);
1145 
1146 	em_check_for_link(&adapter->hw);
1147 	if (E1000_READ_REG(&adapter->hw, STATUS) & E1000_STATUS_LU) {
1148 		if (adapter->link_active == 0) {
1149 			em_get_speed_and_duplex(&adapter->hw,
1150 						&adapter->link_speed,
1151 						&adapter->link_duplex);
1152 			adapter->link_active = 1;
1153 		}
1154 	} else {
1155 		if (adapter->link_active == 1) {
1156 			adapter->link_speed = 0;
1157 			adapter->link_duplex = 0;
1158 			adapter->link_active = 0;
1159 		}
1160 	}
1161 
1162 	ifmr->ifm_status = IFM_AVALID;
1163 	ifmr->ifm_active = IFM_ETHER;
1164 
1165 	if (!adapter->link_active)
1166 		return;
1167 
1168 	ifmr->ifm_status |= IFM_ACTIVE;
1169 
1170 	if (adapter->hw.media_type == em_media_type_fiber ||
1171 	    adapter->hw.media_type == em_media_type_internal_serdes) {
1172 		if (adapter->hw.mac_type == em_82545)
1173 			fiber_type = IFM_1000_LX;
1174 		ifmr->ifm_active |= fiber_type | IFM_FDX;
1175 		ifmr->ifm_active |= IFM_1000_SX | IFM_FDX;
1176 	} else {
1177 		switch (adapter->link_speed) {
1178 		case 10:
1179 			ifmr->ifm_active |= IFM_10_T;
1180 			break;
1181 		case 100:
1182 			ifmr->ifm_active |= IFM_100_TX;
1183 			break;
1184 		case 1000:
1185 			ifmr->ifm_active |= IFM_1000_T;
1186 			break;
1187 		}
1188 		if (adapter->link_duplex == FULL_DUPLEX)
1189 			ifmr->ifm_active |= IFM_FDX;
1190 		else
1191 			ifmr->ifm_active |= IFM_HDX;
1192 	}
1193 }
1194 
1195 /*********************************************************************
1196  *
1197  *  Media Ioctl callback
1198  *
1199  *  This routine is called when the user changes speed/duplex using
1200  *  media/mediopt option with ifconfig.
1201  *
1202  **********************************************************************/
1203 static int
1204 em_media_change(struct ifnet *ifp)
1205 {
1206 	struct adapter * adapter = ifp->if_softc;
1207 	struct ifmedia  *ifm = &adapter->media;
1208 
1209 	INIT_DEBUGOUT("em_media_change: begin");
1210 
1211 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1212 		return(EINVAL);
1213 
1214 	ASSERT_SERIALIZED(ifp->if_serializer);
1215 
1216 	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1217 	case IFM_AUTO:
1218 		adapter->hw.autoneg = DO_AUTO_NEG;
1219 		adapter->hw.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1220 		break;
1221 	case IFM_1000_LX:
1222 	case IFM_1000_SX:
1223 	case IFM_1000_T:
1224 		adapter->hw.autoneg = DO_AUTO_NEG;
1225 		adapter->hw.autoneg_advertised = ADVERTISE_1000_FULL;
1226 		break;
1227 	case IFM_100_TX:
1228 		adapter->hw.autoneg = FALSE;
1229 		adapter->hw.autoneg_advertised = 0;
1230 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1231 			adapter->hw.forced_speed_duplex = em_100_full;
1232 		else
1233 			adapter->hw.forced_speed_duplex	= em_100_half;
1234 		break;
1235 	case IFM_10_T:
1236 		adapter->hw.autoneg = FALSE;
1237 		adapter->hw.autoneg_advertised = 0;
1238 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1239 			adapter->hw.forced_speed_duplex = em_10_full;
1240 		else
1241 			adapter->hw.forced_speed_duplex	= em_10_half;
1242 		break;
1243 	default:
1244 		if_printf(ifp, "Unsupported media type\n");
1245 	}
1246 	/*
1247 	 * As the speed/duplex settings may have changed we need to
1248 	 * reset the PHY.
1249 	 */
1250 	adapter->hw.phy_reset_disable = FALSE;
1251 
1252 	em_init(adapter);
1253 
1254 	return(0);
1255 }
1256 
1257 static void
1258 em_tx_cb(void *arg, bus_dma_segment_t *seg, int nsegs, bus_size_t mapsize,
1259 	 int error)
1260 {
1261 	struct em_q *q = arg;
1262 
1263 	if (error)
1264 		return;
1265 	KASSERT(nsegs <= EM_MAX_SCATTER,
1266 		("Too many DMA segments returned when mapping tx packet"));
1267 	q->nsegs = nsegs;
1268 	bcopy(seg, q->segs, nsegs * sizeof(seg[0]));
1269 }
1270 
1271 /*********************************************************************
1272  *
1273  *  This routine maps the mbufs to tx descriptors.
1274  *
1275  *  return 0 on success, positive on failure
1276  **********************************************************************/
1277 static int
1278 em_encap(struct adapter *adapter, struct mbuf *m_head)
1279 {
1280 	uint32_t txd_upper;
1281 	uint32_t txd_lower, txd_used = 0, txd_saved = 0;
1282 	int i, j, error;
1283 	uint64_t address;
1284 
1285 	/* For 82544 Workaround */
1286 	DESC_ARRAY desc_array;
1287 	uint32_t array_elements;
1288 	uint32_t counter;
1289 
1290 	struct ifvlan *ifv = NULL;
1291 	struct em_q q;
1292 	struct em_buffer *tx_buffer = NULL, *tx_buffer_map;
1293 	bus_dmamap_t map;
1294 	struct em_tx_desc *current_tx_desc = NULL;
1295 	struct ifnet *ifp = &adapter->interface_data.ac_if;
1296 
1297 	/*
1298 	 * Force a cleanup if number of TX descriptors
1299 	 * available hits the threshold
1300 	 */
1301 	if (adapter->num_tx_desc_avail <= EM_TX_CLEANUP_THRESHOLD) {
1302 		em_clean_transmit_interrupts(adapter);
1303 		if (adapter->num_tx_desc_avail <= EM_TX_CLEANUP_THRESHOLD) {
1304 			adapter->no_tx_desc_avail1++;
1305 			return(ENOBUFS);
1306 		}
1307 	}
1308 	/*
1309 	 * Map the packet for DMA.
1310 	 */
1311 	tx_buffer_map = &adapter->tx_buffer_area[adapter->next_avail_tx_desc];
1312 	map = tx_buffer_map->map;
1313 	error = bus_dmamap_load_mbuf(adapter->txtag, map, m_head, em_tx_cb,
1314 				     &q, BUS_DMA_NOWAIT);
1315 	if (error != 0) {
1316 		adapter->no_tx_dma_setup++;
1317 		return(error);
1318 	}
1319 	KASSERT(q.nsegs != 0, ("em_encap: empty packet"));
1320 
1321 	if (q.nsegs > adapter->num_tx_desc_avail) {
1322 		adapter->no_tx_desc_avail2++;
1323 		error = ENOBUFS;
1324 		goto fail;
1325 	}
1326 
1327 	if (ifp->if_hwassist > 0) {
1328 		em_transmit_checksum_setup(adapter,  m_head,
1329 					   &txd_upper, &txd_lower);
1330 	} else {
1331 		txd_upper = txd_lower = 0;
1332 	}
1333 
1334 	/* Find out if we are in vlan mode */
1335 	if ((m_head->m_flags & (M_PROTO1|M_PKTHDR)) == (M_PROTO1|M_PKTHDR) &&
1336 	    m_head->m_pkthdr.rcvif != NULL &&
1337 	    m_head->m_pkthdr.rcvif->if_type == IFT_L2VLAN)
1338 		ifv = m_head->m_pkthdr.rcvif->if_softc;
1339 
1340 	i = adapter->next_avail_tx_desc;
1341 	if (adapter->pcix_82544) {
1342 		txd_saved = i;
1343 		txd_used = 0;
1344 	}
1345 	for (j = 0; j < q.nsegs; j++) {
1346 		/* If adapter is 82544 and on PCIX bus */
1347 		if(adapter->pcix_82544) {
1348 			array_elements = 0;
1349 			address = htole64(q.segs[j].ds_addr);
1350 			/*
1351 			 * Check the Address and Length combination and
1352 			 * split the data accordingly
1353 			 */
1354 			array_elements = em_fill_descriptors(address,
1355 						htole32(q.segs[j].ds_len),
1356 						&desc_array);
1357 			for (counter = 0; counter < array_elements; counter++) {
1358 				if (txd_used == adapter->num_tx_desc_avail) {
1359 					adapter->next_avail_tx_desc = txd_saved;
1360 					adapter->no_tx_desc_avail2++;
1361 					error = ENOBUFS;
1362 					goto fail;
1363 				}
1364 				tx_buffer = &adapter->tx_buffer_area[i];
1365 				current_tx_desc = &adapter->tx_desc_base[i];
1366 				current_tx_desc->buffer_addr = htole64(
1367 				desc_array.descriptor[counter].address);
1368 				current_tx_desc->lower.data = htole32(
1369 				(adapter->txd_cmd | txd_lower |
1370 				(uint16_t)desc_array.descriptor[counter].length));
1371 				current_tx_desc->upper.data = htole32((txd_upper));
1372 				if (++i == adapter->num_tx_desc)
1373 					i = 0;
1374 
1375 				tx_buffer->m_head = NULL;
1376 				txd_used++;
1377 			}
1378 		} else {
1379 			tx_buffer = &adapter->tx_buffer_area[i];
1380 			current_tx_desc = &adapter->tx_desc_base[i];
1381 
1382 			current_tx_desc->buffer_addr = htole64(q.segs[j].ds_addr);
1383 			current_tx_desc->lower.data = htole32(
1384 				adapter->txd_cmd | txd_lower | q.segs[j].ds_len);
1385 			current_tx_desc->upper.data = htole32(txd_upper);
1386 
1387 			if (++i == adapter->num_tx_desc)
1388 				i = 0;
1389 
1390 			tx_buffer->m_head = NULL;
1391 		}
1392 	}
1393 
1394 	adapter->next_avail_tx_desc = i;
1395 	if (adapter->pcix_82544)
1396 		adapter->num_tx_desc_avail -= txd_used;
1397 	else
1398 		adapter->num_tx_desc_avail -= q.nsegs;
1399 
1400 	if (ifv != NULL) {
1401 		/* Set the vlan id */
1402 		current_tx_desc->upper.fields.special = htole16(ifv->ifv_tag);
1403 
1404 		/* Tell hardware to add tag */
1405 		current_tx_desc->lower.data |= htole32(E1000_TXD_CMD_VLE);
1406 	}
1407 
1408 	tx_buffer->m_head = m_head;
1409 	tx_buffer_map->map = tx_buffer->map;
1410 	tx_buffer->map = map;
1411 	bus_dmamap_sync(adapter->txtag, map, BUS_DMASYNC_PREWRITE);
1412 
1413 	/*
1414 	 * Last Descriptor of Packet needs End Of Packet (EOP)
1415 	 */
1416 	current_tx_desc->lower.data |= htole32(E1000_TXD_CMD_EOP);
1417 
1418 	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
1419 			BUS_DMASYNC_PREWRITE);
1420 
1421 	/*
1422 	 * Advance the Transmit Descriptor Tail (Tdt), this tells the E1000
1423 	 * that this frame is available to transmit.
1424 	 */
1425 	if (adapter->hw.mac_type == em_82547 &&
1426 	    adapter->link_duplex == HALF_DUPLEX) {
1427 		em_82547_move_tail_serialized(adapter);
1428 	} else {
1429 		E1000_WRITE_REG(&adapter->hw, TDT, i);
1430 		if (adapter->hw.mac_type == em_82547) {
1431 			em_82547_update_fifo_head(adapter,
1432 						  m_head->m_pkthdr.len);
1433 		}
1434 	}
1435 
1436 	return(0);
1437 fail:
1438 	bus_dmamap_unload(adapter->txtag, map);
1439 	return error;
1440 }
1441 
1442 /*********************************************************************
1443  *
1444  * 82547 workaround to avoid controller hang in half-duplex environment.
1445  * The workaround is to avoid queuing a large packet that would span
1446  * the internal Tx FIFO ring boundary. We need to reset the FIFO pointers
1447  * in this case. We do that only when FIFO is quiescent.
1448  *
1449  **********************************************************************/
1450 static void
1451 em_82547_move_tail(void *arg)
1452 {
1453 	struct adapter *adapter = arg;
1454 	struct ifnet *ifp = &adapter->interface_data.ac_if;
1455 
1456 	lwkt_serialize_enter(ifp->if_serializer);
1457 	em_82547_move_tail_serialized(adapter);
1458 	lwkt_serialize_exit(ifp->if_serializer);
1459 }
1460 
1461 static void
1462 em_82547_move_tail_serialized(struct adapter *adapter)
1463 {
1464 	uint16_t hw_tdt;
1465 	uint16_t sw_tdt;
1466 	struct em_tx_desc *tx_desc;
1467 	uint16_t length = 0;
1468 	boolean_t eop = 0;
1469 
1470 	hw_tdt = E1000_READ_REG(&adapter->hw, TDT);
1471 	sw_tdt = adapter->next_avail_tx_desc;
1472 
1473 	while (hw_tdt != sw_tdt) {
1474 		tx_desc = &adapter->tx_desc_base[hw_tdt];
1475 		length += tx_desc->lower.flags.length;
1476 		eop = tx_desc->lower.data & E1000_TXD_CMD_EOP;
1477 		if(++hw_tdt == adapter->num_tx_desc)
1478 			hw_tdt = 0;
1479 
1480 		if(eop) {
1481 			if (em_82547_fifo_workaround(adapter, length)) {
1482 				adapter->tx_fifo_wrk_cnt++;
1483 				callout_reset(&adapter->tx_fifo_timer, 1,
1484 					em_82547_move_tail, adapter);
1485 				break;
1486 			}
1487 			E1000_WRITE_REG(&adapter->hw, TDT, hw_tdt);
1488 			em_82547_update_fifo_head(adapter, length);
1489 			length = 0;
1490 		}
1491 	}
1492 }
1493 
1494 static int
1495 em_82547_fifo_workaround(struct adapter *adapter, int len)
1496 {
1497 	int fifo_space, fifo_pkt_len;
1498 
1499 	fifo_pkt_len = roundup2(len + EM_FIFO_HDR, EM_FIFO_HDR);
1500 
1501 	if (adapter->link_duplex == HALF_DUPLEX) {
1502 		fifo_space = adapter->tx_fifo_size - adapter->tx_fifo_head;
1503 
1504 		if (fifo_pkt_len >= (EM_82547_PKT_THRESH + fifo_space)) {
1505 			if (em_82547_tx_fifo_reset(adapter))
1506 				return(0);
1507 			else
1508 				return(1);
1509 		}
1510 	}
1511 
1512 	return(0);
1513 }
1514 
1515 static void
1516 em_82547_update_fifo_head(struct adapter *adapter, int len)
1517 {
1518 	int fifo_pkt_len = roundup2(len + EM_FIFO_HDR, EM_FIFO_HDR);
1519 
1520 	/* tx_fifo_head is always 16 byte aligned */
1521 	adapter->tx_fifo_head += fifo_pkt_len;
1522 	if (adapter->tx_fifo_head >= adapter->tx_fifo_size)
1523 		adapter->tx_fifo_head -= adapter->tx_fifo_size;
1524 }
1525 
1526 static int
1527 em_82547_tx_fifo_reset(struct adapter *adapter)
1528 {
1529 	uint32_t tctl;
1530 
1531 	if ( (E1000_READ_REG(&adapter->hw, TDT) ==
1532 	      E1000_READ_REG(&adapter->hw, TDH)) &&
1533 	     (E1000_READ_REG(&adapter->hw, TDFT) ==
1534 	      E1000_READ_REG(&adapter->hw, TDFH)) &&
1535 	     (E1000_READ_REG(&adapter->hw, TDFTS) ==
1536 	      E1000_READ_REG(&adapter->hw, TDFHS)) &&
1537 	     (E1000_READ_REG(&adapter->hw, TDFPC) == 0)) {
1538 
1539 		/* Disable TX unit */
1540 		tctl = E1000_READ_REG(&adapter->hw, TCTL);
1541 		E1000_WRITE_REG(&adapter->hw, TCTL, tctl & ~E1000_TCTL_EN);
1542 
1543 		/* Reset FIFO pointers */
1544 		E1000_WRITE_REG(&adapter->hw, TDFT,  adapter->tx_head_addr);
1545 		E1000_WRITE_REG(&adapter->hw, TDFH,  adapter->tx_head_addr);
1546 		E1000_WRITE_REG(&adapter->hw, TDFTS, adapter->tx_head_addr);
1547 		E1000_WRITE_REG(&adapter->hw, TDFHS, adapter->tx_head_addr);
1548 
1549 		/* Re-enable TX unit */
1550 		E1000_WRITE_REG(&adapter->hw, TCTL, tctl);
1551 		E1000_WRITE_FLUSH(&adapter->hw);
1552 
1553 		adapter->tx_fifo_head = 0;
1554 		adapter->tx_fifo_reset_cnt++;
1555 
1556 		return(TRUE);
1557 	} else {
1558 		return(FALSE);
1559 	}
1560 }
1561 
1562 static void
1563 em_set_promisc(struct adapter *adapter)
1564 {
1565 	uint32_t reg_rctl;
1566 	struct ifnet *ifp = &adapter->interface_data.ac_if;
1567 
1568 	reg_rctl = E1000_READ_REG(&adapter->hw, RCTL);
1569 
1570 	adapter->em_insert_vlan_header = 0;
1571 	if (ifp->if_flags & IFF_PROMISC) {
1572 		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1573 		E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1574 
1575 		/*
1576 		 * Disable VLAN stripping in promiscous mode.
1577 		 * This enables bridging of vlan tagged frames to occur
1578 		 * and also allows vlan tags to be seen in tcpdump.
1579 		 */
1580 		if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1581 			em_disable_vlans(adapter);
1582 		adapter->em_insert_vlan_header = 1;
1583 	} else if (ifp->if_flags & IFF_ALLMULTI) {
1584 		reg_rctl |= E1000_RCTL_MPE;
1585 		reg_rctl &= ~E1000_RCTL_UPE;
1586 		E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1587 	}
1588 }
1589 
1590 static void
1591 em_disable_promisc(struct adapter *adapter)
1592 {
1593 	struct ifnet *ifp = &adapter->interface_data.ac_if;
1594 
1595 	uint32_t reg_rctl;
1596 
1597 	reg_rctl = E1000_READ_REG(&adapter->hw, RCTL);
1598 
1599 	reg_rctl &= (~E1000_RCTL_UPE);
1600 	reg_rctl &= (~E1000_RCTL_MPE);
1601 	E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1602 
1603 	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1604 		em_enable_vlans(adapter);
1605 	adapter->em_insert_vlan_header = 0;
1606 }
1607 
1608 /*********************************************************************
1609  *  Multicast Update
1610  *
1611  *  This routine is called whenever multicast address list is updated.
1612  *
1613  **********************************************************************/
1614 
1615 static void
1616 em_set_multi(struct adapter *adapter)
1617 {
1618 	uint32_t reg_rctl = 0;
1619 	uint8_t mta[MAX_NUM_MULTICAST_ADDRESSES * ETH_LENGTH_OF_ADDRESS];
1620 	struct ifmultiaddr *ifma;
1621 	int mcnt = 0;
1622 	struct ifnet *ifp = &adapter->interface_data.ac_if;
1623 
1624 	IOCTL_DEBUGOUT("em_set_multi: begin");
1625 
1626 	if (adapter->hw.mac_type == em_82542_rev2_0) {
1627 		reg_rctl = E1000_READ_REG(&adapter->hw, RCTL);
1628 		if (adapter->hw.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
1629 			em_pci_clear_mwi(&adapter->hw);
1630 		reg_rctl |= E1000_RCTL_RST;
1631 		E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1632 		msec_delay(5);
1633 	}
1634 
1635 	LIST_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1636 		if (ifma->ifma_addr->sa_family != AF_LINK)
1637 			continue;
1638 
1639 		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
1640 			break;
1641 
1642 		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1643 		      &mta[mcnt*ETH_LENGTH_OF_ADDRESS], ETH_LENGTH_OF_ADDRESS);
1644 		mcnt++;
1645 	}
1646 
1647 	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
1648 		reg_rctl = E1000_READ_REG(&adapter->hw, RCTL);
1649 		reg_rctl |= E1000_RCTL_MPE;
1650 		E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1651 	} else {
1652 		em_mc_addr_list_update(&adapter->hw, mta, mcnt, 0, 1);
1653 	}
1654 
1655 	if (adapter->hw.mac_type == em_82542_rev2_0) {
1656 		reg_rctl = E1000_READ_REG(&adapter->hw, RCTL);
1657 		reg_rctl &= ~E1000_RCTL_RST;
1658 		E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1659 		msec_delay(5);
1660 		if (adapter->hw.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
1661                         em_pci_set_mwi(&adapter->hw);
1662 	}
1663 }
1664 
1665 /*********************************************************************
1666  *  Timer routine
1667  *
1668  *  This routine checks for link status and updates statistics.
1669  *
1670  **********************************************************************/
1671 
1672 static void
1673 em_local_timer(void *arg)
1674 {
1675 	struct ifnet *ifp;
1676 	struct adapter *adapter = arg;
1677 	ifp = &adapter->interface_data.ac_if;
1678 
1679 	lwkt_serialize_enter(ifp->if_serializer);
1680 
1681 	em_check_for_link(&adapter->hw);
1682 	em_print_link_status(adapter);
1683 	em_update_stats_counters(adapter);
1684 	if (em_display_debug_stats && ifp->if_flags & IFF_RUNNING)
1685 		em_print_hw_stats(adapter);
1686 	em_smartspeed(adapter);
1687 
1688 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1689 
1690 	lwkt_serialize_exit(ifp->if_serializer);
1691 }
1692 
1693 static void
1694 em_print_link_status(struct adapter *adapter)
1695 {
1696 	if (E1000_READ_REG(&adapter->hw, STATUS) & E1000_STATUS_LU) {
1697 		if (adapter->link_active == 0) {
1698 			em_get_speed_and_duplex(&adapter->hw,
1699 						&adapter->link_speed,
1700 						&adapter->link_duplex);
1701 			/* Check if we may set SPEED_MODE bit on PCI-E */
1702 			if ((adapter->link_speed == SPEED_1000) &&
1703 			    ((adapter->hw.mac_type == em_82571) ||
1704 			     (adapter->hw.mac_type == em_82572))) {
1705 				int tarc0;
1706 
1707 #define SPEED_MODE_BIT	(1 << 21)	/* On PCI-E MACs only */
1708 
1709 				tarc0 = E1000_READ_REG(&adapter->hw, TARC0);
1710 				tarc0 |= SPEED_MODE_BIT;
1711 				E1000_WRITE_REG(&adapter->hw, TARC0, tarc0);
1712 
1713 #undef SPEED_MODE_BIT
1714 			}
1715 			if (bootverbose) {
1716 				if_printf(&adapter->interface_data.ac_if,
1717 					  "Link is up %d Mbps %s\n",
1718 					  adapter->link_speed,
1719 					  adapter->link_duplex == FULL_DUPLEX ?
1720 						"Full Duplex" : "Half Duplex");
1721 			}
1722 			adapter->link_active = 1;
1723 			adapter->smartspeed = 0;
1724 		}
1725 	} else {
1726 		if (adapter->link_active == 1) {
1727 			adapter->link_speed = 0;
1728 			adapter->link_duplex = 0;
1729 			if (bootverbose) {
1730 				if_printf(&adapter->interface_data.ac_if,
1731 					  "Link is Down\n");
1732 			}
1733 			adapter->link_active = 0;
1734 		}
1735 	}
1736 }
1737 
1738 /*********************************************************************
1739  *
1740  *  This routine disables all traffic on the adapter by issuing a
1741  *  global reset on the MAC and deallocates TX/RX buffers.
1742  *
1743  **********************************************************************/
1744 
1745 static void
1746 em_stop(void *arg)
1747 {
1748 	struct ifnet   *ifp;
1749 	struct adapter * adapter = arg;
1750 	ifp = &adapter->interface_data.ac_if;
1751 
1752 	ASSERT_SERIALIZED(ifp->if_serializer);
1753 
1754 	INIT_DEBUGOUT("em_stop: begin");
1755 	em_disable_intr(adapter);
1756 	em_reset_hw(&adapter->hw);
1757 	callout_stop(&adapter->timer);
1758 	callout_stop(&adapter->tx_fifo_timer);
1759 	em_free_transmit_structures(adapter);
1760 	em_free_receive_structures(adapter);
1761 
1762 	/* Tell the stack that the interface is no longer active */
1763 	ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);
1764 	ifp->if_timer = 0;
1765 }
1766 
1767 /*********************************************************************
1768  *
1769  *  Determine hardware revision.
1770  *
1771  **********************************************************************/
1772 static void
1773 em_identify_hardware(struct adapter * adapter)
1774 {
1775 	device_t dev = adapter->dev;
1776 
1777 	/* Make sure our PCI config space has the necessary stuff set */
1778 	adapter->hw.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
1779 	if (!((adapter->hw.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
1780 	      (adapter->hw.pci_cmd_word & PCIM_CMD_MEMEN))) {
1781 		device_printf(dev, "Memory Access and/or Bus Master bits "
1782 			      "were not set!\n");
1783 		adapter->hw.pci_cmd_word |= (PCIM_CMD_BUSMASTEREN |
1784 					     PCIM_CMD_MEMEN);
1785 		pci_write_config(dev, PCIR_COMMAND,
1786 				 adapter->hw.pci_cmd_word, 2);
1787 	}
1788 
1789 	/* Save off the information about this board */
1790 	adapter->hw.vendor_id = pci_get_vendor(dev);
1791 	adapter->hw.device_id = pci_get_device(dev);
1792 	adapter->hw.revision_id = pci_get_revid(dev);
1793 	adapter->hw.subsystem_vendor_id = pci_get_subvendor(dev);
1794 	adapter->hw.subsystem_id = pci_get_subdevice(dev);
1795 
1796 	/* Identify the MAC */
1797 	if (em_set_mac_type(&adapter->hw))
1798 		device_printf(dev, "Unknown MAC Type\n");
1799 
1800 	if (adapter->hw.mac_type == em_82541 ||
1801 	    adapter->hw.mac_type == em_82541_rev_2 ||
1802 	    adapter->hw.mac_type == em_82547 ||
1803 	    adapter->hw.mac_type == em_82547_rev_2)
1804 		adapter->hw.phy_init_script = TRUE;
1805 }
1806 
1807 static int
1808 em_allocate_pci_resource(device_t dev)
1809 {
1810 	struct adapter *adapter = device_get_softc(dev);
1811 	int rid;
1812 
1813 	rid = EM_MMBA;
1814 	adapter->res_memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
1815 						     &rid, RF_ACTIVE);
1816 	if (!(adapter->res_memory)) {
1817 		device_printf(dev, "Unable to allocate bus resource: memory\n");
1818 		return ENXIO;
1819 	}
1820 	adapter->osdep.mem_bus_space_tag =
1821 		rman_get_bustag(adapter->res_memory);
1822 	adapter->osdep.mem_bus_space_handle =
1823 	    rman_get_bushandle(adapter->res_memory);
1824 	adapter->hw.hw_addr = (uint8_t *)&adapter->osdep.mem_bus_space_handle;
1825 
1826 	if (adapter->hw.mac_type > em_82543) {
1827 		int i, val;
1828 
1829 		/* Figure our where our IO BAR is ? */
1830 		rid = EM_MMBA;
1831 		for (i = 0; i < 5; i++) {
1832 			val = pci_read_config(dev, rid, 4);
1833 			if (val & 0x00000001) {
1834 				adapter->io_rid = rid;
1835 				break;
1836 			}
1837 			rid += 4;
1838 		}
1839 
1840 		adapter->res_ioport = bus_alloc_resource_any(dev,
1841 		    SYS_RES_IOPORT, &adapter->io_rid, RF_ACTIVE);
1842 		if (!(adapter->res_ioport)) {
1843 			device_printf(dev, "Unable to allocate bus resource: "
1844 				      "ioport\n");
1845 			return ENXIO;
1846 		}
1847 
1848 		adapter->hw.reg_io_tag = rman_get_bustag(adapter->res_ioport);
1849 		adapter->hw.reg_io_handle =
1850 			rman_get_bushandle(adapter->res_ioport);
1851 	}
1852 
1853 	/* For ICH8 we need to find the flash memory */
1854 	if (adapter->hw.mac_type == em_ich8lan) {
1855 		rid = EM_FLASH;
1856 
1857 		adapter->flash_mem = bus_alloc_resource_any(dev,
1858 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
1859 		if (adapter->flash_mem == NULL) {
1860 			device_printf(dev, "Unable to allocate bus resource: "
1861 				      "flash memory\n");
1862 			return ENXIO;
1863 		}
1864 		adapter->osdep.flash_bus_space_tag =
1865 		    rman_get_bustag(adapter->flash_mem);
1866 		adapter->osdep.flash_bus_space_handle =
1867 		    rman_get_bushandle(adapter->flash_mem);
1868 	}
1869 
1870 	rid = 0x0;
1871 	adapter->res_interrupt = bus_alloc_resource_any(dev, SYS_RES_IRQ,
1872 	    &rid, RF_SHAREABLE | RF_ACTIVE);
1873 	if (!(adapter->res_interrupt)) {
1874 		device_printf(dev, "Unable to allocate bus resource: "
1875 			      "interrupt\n");
1876 		return ENXIO;
1877 	}
1878 
1879 	adapter->hw.back = &adapter->osdep;
1880 
1881 	return 0;
1882 }
1883 
1884 static void
1885 em_free_pci_resource(device_t dev)
1886 {
1887 	struct adapter *adapter = device_get_softc(dev);
1888 
1889 	if (adapter->res_interrupt != NULL) {
1890 		bus_release_resource(dev, SYS_RES_IRQ, 0,
1891 				     adapter->res_interrupt);
1892 	}
1893 	if (adapter->res_memory != NULL) {
1894 		bus_release_resource(dev, SYS_RES_MEMORY, EM_MMBA,
1895 				     adapter->res_memory);
1896 	}
1897 
1898 	if (adapter->res_ioport != NULL) {
1899 		bus_release_resource(dev, SYS_RES_IOPORT, adapter->io_rid,
1900 				     adapter->res_ioport);
1901 	}
1902 
1903 	if (adapter->flash_mem != NULL) {
1904 		bus_release_resource(dev, SYS_RES_MEMORY, EM_FLASH,
1905 				     adapter->flash_mem);
1906 	}
1907 }
1908 
1909 /*********************************************************************
1910  *
1911  *  Initialize the hardware to a configuration as specified by the
1912  *  adapter structure. The controller is reset, the EEPROM is
1913  *  verified, the MAC address is set, then the shared initialization
1914  *  routines are called.
1915  *
1916  **********************************************************************/
1917 static int
1918 em_hardware_init(struct adapter *adapter)
1919 {
1920 	uint16_t	rx_buffer_size;
1921 
1922 	INIT_DEBUGOUT("em_hardware_init: begin");
1923 	/* Issue a global reset */
1924 	em_reset_hw(&adapter->hw);
1925 
1926 	/* When hardware is reset, fifo_head is also reset */
1927 	adapter->tx_fifo_head = 0;
1928 
1929 	/* Make sure we have a good EEPROM before we read from it */
1930 	if (em_validate_eeprom_checksum(&adapter->hw) < 0) {
1931 		device_printf(adapter->dev,
1932 			      "The EEPROM Checksum Is Not Valid\n");
1933 		return(EIO);
1934 	}
1935 
1936 	if (em_read_part_num(&adapter->hw, &(adapter->part_num)) < 0) {
1937 		device_printf(adapter->dev,
1938 			      "EEPROM read error while reading part number\n");
1939 		return(EIO);
1940 	}
1941 
1942 	/* Set up smart power down as default off on newer adapters */
1943 	if (!em_smart_pwr_down &&
1944 	    (adapter->hw.mac_type == em_82571 ||
1945 	     adapter->hw.mac_type == em_82572)) {
1946 		uint16_t phy_tmp = 0;
1947 
1948 		/* Speed up time to link by disabling smart power down */
1949 		em_read_phy_reg(&adapter->hw, IGP02E1000_PHY_POWER_MGMT,
1950 				&phy_tmp);
1951 		phy_tmp &= ~IGP02E1000_PM_SPD;
1952 		em_write_phy_reg(&adapter->hw, IGP02E1000_PHY_POWER_MGMT,
1953 				 phy_tmp);
1954 	}
1955 
1956 	/*
1957 	 * These parameters control the automatic generation (Tx) and
1958 	 * response(Rx) to Ethernet PAUSE frames.
1959 	 * - High water mark should allow for at least two frames to be
1960 	 *   received after sending an XOFF.
1961 	 * - Low water mark works best when it is very near the high water mark.
1962 	 *   This allows the receiver to restart by sending XON when it has
1963 	 *   drained a bit.  Here we use an arbitary value of 1500 which will
1964 	 *   restart after one full frame is pulled from the buffer.  There
1965 	 *   could be several smaller frames in the buffer and if so they will
1966 	 *   not trigger the XON until their total number reduces the buffer
1967 	 *   by 1500.
1968 	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
1969 	 */
1970 	rx_buffer_size = ((E1000_READ_REG(&adapter->hw, PBA) & 0xffff) << 10);
1971 
1972 	adapter->hw.fc_high_water =
1973 	    rx_buffer_size - roundup2(1 * adapter->hw.max_frame_size, 1024);
1974 	adapter->hw.fc_low_water = adapter->hw.fc_high_water - 1500;
1975 	if (adapter->hw.mac_type == em_80003es2lan)
1976 		adapter->hw.fc_pause_time = 0xFFFF;
1977 	else
1978 		adapter->hw.fc_pause_time = 0x1000;
1979 	adapter->hw.fc_send_xon = TRUE;
1980 	adapter->hw.fc = em_fc_full;
1981 
1982 	if (em_init_hw(&adapter->hw) < 0) {
1983 		device_printf(adapter->dev, "Hardware Initialization Failed");
1984 		return(EIO);
1985 	}
1986 
1987 	em_check_for_link(&adapter->hw);
1988 	/*
1989 	 * At the time this code runs copper NICS fail, but fiber
1990 	 * succeed, however, this causes a problem downstream,
1991 	 * so for now have fiber NICs just not do this, then
1992 	 * everything seems to work correctly.
1993 	 */
1994 	if ((adapter->hw.media_type != em_media_type_fiber &&
1995 	     adapter->hw.media_type != em_media_type_internal_serdes) &&
1996 	    (E1000_READ_REG(&adapter->hw, STATUS) & E1000_STATUS_LU))
1997 		adapter->link_active = 1;
1998 	else
1999 		adapter->link_active = 0;
2000 
2001 	if (adapter->link_active) {
2002 		em_get_speed_and_duplex(&adapter->hw,
2003 					&adapter->link_speed,
2004 					&adapter->link_duplex);
2005 	} else {
2006 		adapter->link_speed = 0;
2007 		adapter->link_duplex = 0;
2008 	}
2009 
2010 	return(0);
2011 }
2012 
2013 /*********************************************************************
2014  *
2015  *  Setup networking device structure and register an interface.
2016  *
2017  **********************************************************************/
2018 static void
2019 em_setup_interface(device_t dev, struct adapter *adapter)
2020 {
2021 	struct ifnet   *ifp;
2022 	u_char fiber_type = IFM_1000_SX;	/* default type */
2023 	INIT_DEBUGOUT("em_setup_interface: begin");
2024 
2025 	ifp = &adapter->interface_data.ac_if;
2026 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2027 	ifp->if_mtu = ETHERMTU;
2028 	ifp->if_baudrate = 1000000000;
2029 	ifp->if_init =  em_init;
2030 	ifp->if_softc = adapter;
2031 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2032 	ifp->if_ioctl = em_ioctl;
2033 	ifp->if_start = em_start;
2034 #ifdef DEVICE_POLLING
2035 	ifp->if_poll = em_poll;
2036 #endif
2037 	ifp->if_watchdog = em_watchdog;
2038 	ifq_set_maxlen(&ifp->if_snd, adapter->num_tx_desc - 1);
2039 	ifq_set_ready(&ifp->if_snd);
2040 
2041 	if (adapter->hw.mac_type >= em_82543)
2042 		ifp->if_capabilities |= IFCAP_HWCSUM;
2043 
2044 	ifp->if_capenable = ifp->if_capabilities;
2045 
2046 	ether_ifattach(ifp, adapter->hw.mac_addr, NULL);
2047 
2048 	/*
2049 	 * Tell the upper layer(s) we support long frames.
2050 	 */
2051 	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2052         ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2053 
2054 	/*
2055 	 * Specify the media types supported by this adapter and register
2056 	 * callbacks to update media and link information
2057 	 */
2058 	ifmedia_init(&adapter->media, IFM_IMASK, em_media_change,
2059 		     em_media_status);
2060 	if (adapter->hw.media_type == em_media_type_fiber ||
2061 	    adapter->hw.media_type == em_media_type_internal_serdes) {
2062 		if (adapter->hw.mac_type == em_82545)
2063 			fiber_type = IFM_1000_LX;
2064 		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
2065 			    0, NULL);
2066 		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type,
2067 			    0, NULL);
2068 	} else {
2069 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2070 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2071 			    0, NULL);
2072 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2073 			    0, NULL);
2074 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2075 			    0, NULL);
2076 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_T | IFM_FDX,
2077 			    0, NULL);
2078 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_T, 0, NULL);
2079 	}
2080 	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2081 	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2082 }
2083 
2084 /*********************************************************************
2085  *
2086  *  Workaround for SmartSpeed on 82541 and 82547 controllers
2087  *
2088  **********************************************************************/
2089 static void
2090 em_smartspeed(struct adapter *adapter)
2091 {
2092 	uint16_t phy_tmp;
2093 
2094 	if (adapter->link_active || (adapter->hw.phy_type != em_phy_igp) ||
2095 	    !adapter->hw.autoneg ||
2096 	    !(adapter->hw.autoneg_advertised & ADVERTISE_1000_FULL))
2097 		return;
2098 
2099 	if (adapter->smartspeed == 0) {
2100 		/*
2101 		 * If Master/Slave config fault is asserted twice,
2102 		 * we assume back-to-back.
2103 		 */
2104 		em_read_phy_reg(&adapter->hw, PHY_1000T_STATUS, &phy_tmp);
2105 		if (!(phy_tmp & SR_1000T_MS_CONFIG_FAULT))
2106 			return;
2107 		em_read_phy_reg(&adapter->hw, PHY_1000T_STATUS, &phy_tmp);
2108 		if (phy_tmp & SR_1000T_MS_CONFIG_FAULT) {
2109 			em_read_phy_reg(&adapter->hw, PHY_1000T_CTRL,
2110 					&phy_tmp);
2111 			if (phy_tmp & CR_1000T_MS_ENABLE) {
2112 				phy_tmp &= ~CR_1000T_MS_ENABLE;
2113 				em_write_phy_reg(&adapter->hw,
2114 						 PHY_1000T_CTRL, phy_tmp);
2115 				adapter->smartspeed++;
2116 				if (adapter->hw.autoneg &&
2117 				    !em_phy_setup_autoneg(&adapter->hw) &&
2118 				    !em_read_phy_reg(&adapter->hw, PHY_CTRL,
2119 						     &phy_tmp)) {
2120 					phy_tmp |= (MII_CR_AUTO_NEG_EN |
2121 						    MII_CR_RESTART_AUTO_NEG);
2122 					em_write_phy_reg(&adapter->hw,
2123 							 PHY_CTRL, phy_tmp);
2124 				}
2125 			}
2126 		}
2127                 return;
2128 	} else if (adapter->smartspeed == EM_SMARTSPEED_DOWNSHIFT) {
2129 		/* If still no link, perhaps using 2/3 pair cable */
2130 		em_read_phy_reg(&adapter->hw, PHY_1000T_CTRL, &phy_tmp);
2131 		phy_tmp |= CR_1000T_MS_ENABLE;
2132 		em_write_phy_reg(&adapter->hw, PHY_1000T_CTRL, phy_tmp);
2133 		if (adapter->hw.autoneg &&
2134 		    !em_phy_setup_autoneg(&adapter->hw) &&
2135 		    !em_read_phy_reg(&adapter->hw, PHY_CTRL, &phy_tmp)) {
2136 			phy_tmp |= (MII_CR_AUTO_NEG_EN |
2137 				    MII_CR_RESTART_AUTO_NEG);
2138 			em_write_phy_reg(&adapter->hw, PHY_CTRL, phy_tmp);
2139 		}
2140 	}
2141 	/* Restart process after EM_SMARTSPEED_MAX iterations */
2142 	if (adapter->smartspeed++ == EM_SMARTSPEED_MAX)
2143 		adapter->smartspeed = 0;
2144 }
2145 
2146 /*
2147  * Manage DMA'able memory.
2148  */
2149 static void
2150 em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2151 {
2152 	if (error)
2153 		return;
2154 	*(bus_addr_t*) arg = segs->ds_addr;
2155 }
2156 
2157 static int
2158 em_dma_malloc(struct adapter *adapter, bus_size_t size,
2159 	      struct em_dma_alloc *dma, int mapflags)
2160 {
2161 	int r;
2162 	device_t dev = adapter->dev;
2163 
2164 	r = bus_dma_tag_create(NULL,                    /* parent */
2165 			       PAGE_SIZE, 0,            /* alignment, bounds */
2166 			       BUS_SPACE_MAXADDR,       /* lowaddr */
2167 			       BUS_SPACE_MAXADDR,       /* highaddr */
2168 			       NULL, NULL,              /* filter, filterarg */
2169 			       size,                    /* maxsize */
2170 			       1,                       /* nsegments */
2171 			       size,                    /* maxsegsize */
2172 			       BUS_DMA_ALLOCNOW,        /* flags */
2173 			       &dma->dma_tag);
2174 	if (r != 0) {
2175 		device_printf(dev, "em_dma_malloc: bus_dma_tag_create failed; "
2176 			      "error %u\n", r);
2177 		goto fail_0;
2178 	}
2179 
2180 	r = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2181 			     BUS_DMA_NOWAIT, &dma->dma_map);
2182 	if (r != 0) {
2183 		device_printf(dev, "em_dma_malloc: bus_dmammem_alloc failed; "
2184 			      "size %llu, error %d\n", (uintmax_t)size, r);
2185 		goto fail_2;
2186 	}
2187 
2188 	r = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2189 			    size,
2190 			    em_dmamap_cb,
2191 			    &dma->dma_paddr,
2192 			    mapflags | BUS_DMA_NOWAIT);
2193 	if (r != 0) {
2194 		device_printf(dev, "em_dma_malloc: bus_dmamap_load failed; "
2195 			      "error %u\n", r);
2196 		goto fail_3;
2197 	}
2198 
2199 	dma->dma_size = size;
2200 	return(0);
2201 
2202 fail_3:
2203 	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2204 fail_2:
2205 	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2206 	bus_dma_tag_destroy(dma->dma_tag);
2207 fail_0:
2208 	dma->dma_map = NULL;
2209 	dma->dma_tag = NULL;
2210 	return(r);
2211 }
2212 
2213 static void
2214 em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
2215 {
2216 	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2217 	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2218 	bus_dma_tag_destroy(dma->dma_tag);
2219 }
2220 
2221 /*********************************************************************
2222  *
2223  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2224  *  the information needed to transmit a packet on the wire.
2225  *
2226  **********************************************************************/
2227 static int
2228 em_allocate_transmit_structures(struct adapter * adapter)
2229 {
2230 	adapter->tx_buffer_area = malloc(sizeof(struct em_buffer) *
2231 	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
2232 	if (adapter->tx_buffer_area == NULL) {
2233 		device_printf(adapter->dev, "Unable to allocate tx_buffer memory\n");
2234 		return(ENOMEM);
2235 	}
2236 
2237 	return(0);
2238 }
2239 
2240 /*********************************************************************
2241  *
2242  *  Allocate and initialize transmit structures.
2243  *
2244  **********************************************************************/
2245 static int
2246 em_setup_transmit_structures(struct adapter *adapter)
2247 {
2248 	struct em_buffer *tx_buffer;
2249 	bus_size_t size;
2250 	int error, i;
2251 
2252 	/*
2253 	 * Setup DMA descriptor areas.
2254 	 */
2255 	size = roundup2(adapter->hw.max_frame_size, MCLBYTES);
2256 	if (bus_dma_tag_create(NULL,                    /* parent */
2257 			       1, 0,			/* alignment, bounds */
2258 			       BUS_SPACE_MAXADDR,       /* lowaddr */
2259 			       BUS_SPACE_MAXADDR,       /* highaddr */
2260 			       NULL, NULL,              /* filter, filterarg */
2261 			       size,                    /* maxsize */
2262 			       EM_MAX_SCATTER,          /* nsegments */
2263 			       size,                    /* maxsegsize */
2264 			       BUS_DMA_ALLOCNOW,        /* flags */
2265 			       &adapter->txtag)) {
2266 		device_printf(adapter->dev, "Unable to allocate TX DMA tag\n");
2267 		return(ENOMEM);
2268 	}
2269 
2270 	if (em_allocate_transmit_structures(adapter))
2271 		return(ENOMEM);
2272 
2273         bzero((void *) adapter->tx_desc_base,
2274               (sizeof(struct em_tx_desc)) * adapter->num_tx_desc);
2275 	tx_buffer = adapter->tx_buffer_area;
2276 	for (i = 0; i < adapter->num_tx_desc; i++) {
2277 		error = bus_dmamap_create(adapter->txtag, 0, &tx_buffer->map);
2278 		if (error) {
2279 			device_printf(adapter->dev,
2280 				      "Unable to create TX DMA map\n");
2281 			goto fail;
2282 		}
2283 		tx_buffer++;
2284 	}
2285 
2286         adapter->next_avail_tx_desc = 0;
2287 	adapter->oldest_used_tx_desc = 0;
2288 
2289 	/* Set number of descriptors available */
2290 	adapter->num_tx_desc_avail = adapter->num_tx_desc;
2291 
2292 	/* Set checksum context */
2293 	adapter->active_checksum_context = OFFLOAD_NONE;
2294 
2295 	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
2296 			BUS_DMASYNC_PREWRITE);
2297 
2298 	return(0);
2299 fail:
2300 	em_free_transmit_structures(adapter);
2301 	return (error);
2302 }
2303 
2304 /*********************************************************************
2305  *
2306  *  Enable transmit unit.
2307  *
2308  **********************************************************************/
2309 static void
2310 em_initialize_transmit_unit(struct adapter * adapter)
2311 {
2312 	uint32_t reg_tctl, reg_tarc;
2313 	uint32_t reg_tipg = 0;
2314 	uint64_t bus_addr;
2315 
2316 	INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
2317 
2318 	/* Setup the Base and Length of the Tx Descriptor Ring */
2319 	bus_addr = adapter->txdma.dma_paddr;
2320 	E1000_WRITE_REG(&adapter->hw, TDLEN,
2321 			adapter->num_tx_desc * sizeof(struct em_tx_desc));
2322 	E1000_WRITE_REG(&adapter->hw, TDBAH, (uint32_t)(bus_addr >> 32));
2323 	E1000_WRITE_REG(&adapter->hw, TDBAL, (uint32_t)bus_addr);
2324 
2325 	/* Setup the HW Tx Head and Tail descriptor pointers */
2326 	E1000_WRITE_REG(&adapter->hw, TDT, 0);
2327 	E1000_WRITE_REG(&adapter->hw, TDH, 0);
2328 
2329 	HW_DEBUGOUT2("Base = %x, Length = %x\n",
2330 		     E1000_READ_REG(&adapter->hw, TDBAL),
2331 		     E1000_READ_REG(&adapter->hw, TDLEN));
2332 
2333 	/* Set the default values for the Tx Inter Packet Gap timer */
2334 	switch (adapter->hw.mac_type) {
2335 	case em_82542_rev2_0:
2336 	case em_82542_rev2_1:
2337 		reg_tipg = DEFAULT_82542_TIPG_IPGT;
2338 		reg_tipg |= DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
2339 		reg_tipg |= DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
2340 		break;
2341 	case em_80003es2lan:
2342 		reg_tipg = DEFAULT_82543_TIPG_IPGR1;
2343 		reg_tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
2344 		break;
2345 	default:
2346 		if (adapter->hw.media_type == em_media_type_fiber ||
2347 		    adapter->hw.media_type == em_media_type_internal_serdes)
2348 			reg_tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
2349 		else
2350 			reg_tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
2351 		reg_tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
2352 		reg_tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
2353 	}
2354 
2355 	E1000_WRITE_REG(&adapter->hw, TIPG, reg_tipg);
2356 	E1000_WRITE_REG(&adapter->hw, TIDV, adapter->tx_int_delay.value);
2357 	if (adapter->hw.mac_type >= em_82540)
2358 		E1000_WRITE_REG(&adapter->hw, TADV,
2359 				adapter->tx_abs_int_delay.value);
2360 
2361 	/* Do adapter specific tweaks before we enable the transmitter */
2362 	if (adapter->hw.mac_type == em_82571 || adapter->hw.mac_type == em_82572) {
2363 		reg_tarc = E1000_READ_REG(&adapter->hw, TARC0);
2364 		reg_tarc |= (1 << 25);
2365 		E1000_WRITE_REG(&adapter->hw, TARC0, reg_tarc);
2366 		reg_tarc = E1000_READ_REG(&adapter->hw, TARC1);
2367 		reg_tarc |= (1 << 25);
2368 		reg_tarc &= ~(1 << 28);
2369 		E1000_WRITE_REG(&adapter->hw, TARC1, reg_tarc);
2370 	} else if (adapter->hw.mac_type == em_80003es2lan) {
2371 		reg_tarc = E1000_READ_REG(&adapter->hw, TARC0);
2372 		reg_tarc |= 1;
2373 		E1000_WRITE_REG(&adapter->hw, TARC0, reg_tarc);
2374 		reg_tarc = E1000_READ_REG(&adapter->hw, TARC1);
2375 		reg_tarc |= 1;
2376 		E1000_WRITE_REG(&adapter->hw, TARC1, reg_tarc);
2377 	}
2378 
2379 	/* Program the Transmit Control Register */
2380 	reg_tctl = E1000_TCTL_PSP | E1000_TCTL_EN |
2381 		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2382 	if (adapter->hw.mac_type >= em_82571)
2383 		reg_tctl |= E1000_TCTL_MULR;
2384 	if (adapter->link_duplex == 1)
2385 		reg_tctl |= E1000_FDX_COLLISION_DISTANCE << E1000_COLD_SHIFT;
2386 	else
2387 		reg_tctl |= E1000_HDX_COLLISION_DISTANCE << E1000_COLD_SHIFT;
2388 
2389 	/* This write will effectively turn on the transmit unit */
2390 	E1000_WRITE_REG(&adapter->hw, TCTL, reg_tctl);
2391 
2392 	/* Setup Transmit Descriptor Settings for this adapter */
2393 	adapter->txd_cmd = E1000_TXD_CMD_IFCS | E1000_TXD_CMD_RS;
2394 
2395 	if (adapter->tx_int_delay.value > 0)
2396 		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
2397 }
2398 
2399 /*********************************************************************
2400  *
2401  *  Free all transmit related data structures.
2402  *
2403  **********************************************************************/
2404 static void
2405 em_free_transmit_structures(struct adapter * adapter)
2406 {
2407 	struct em_buffer *tx_buffer;
2408 	int i;
2409 
2410 	INIT_DEBUGOUT("free_transmit_structures: begin");
2411 
2412 	if (adapter->tx_buffer_area != NULL) {
2413 		tx_buffer = adapter->tx_buffer_area;
2414 		for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
2415 			if (tx_buffer->m_head != NULL) {
2416 				bus_dmamap_unload(adapter->txtag,
2417 						  tx_buffer->map);
2418 				m_freem(tx_buffer->m_head);
2419 			}
2420 
2421 			if (tx_buffer->map != NULL) {
2422 				bus_dmamap_destroy(adapter->txtag, tx_buffer->map);
2423 				tx_buffer->map = NULL;
2424  			}
2425 			tx_buffer->m_head = NULL;
2426 		}
2427 	}
2428 	if (adapter->tx_buffer_area != NULL) {
2429 		kfree(adapter->tx_buffer_area, M_DEVBUF);
2430 		adapter->tx_buffer_area = NULL;
2431 	}
2432 	if (adapter->txtag != NULL) {
2433 		bus_dma_tag_destroy(adapter->txtag);
2434 		adapter->txtag = NULL;
2435 	}
2436 }
2437 
2438 /*********************************************************************
2439  *
2440  *  The offload context needs to be set when we transfer the first
2441  *  packet of a particular protocol (TCP/UDP). We change the
2442  *  context only if the protocol type changes.
2443  *
2444  **********************************************************************/
2445 static void
2446 em_transmit_checksum_setup(struct adapter * adapter,
2447 			   struct mbuf *mp,
2448 			   uint32_t *txd_upper,
2449 			   uint32_t *txd_lower)
2450 {
2451 	struct em_context_desc *TXD;
2452 	struct em_buffer *tx_buffer;
2453 	int curr_txd;
2454 
2455 	if (mp->m_pkthdr.csum_flags) {
2456 		if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
2457 			*txd_upper = E1000_TXD_POPTS_TXSM << 8;
2458 			*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
2459 			if (adapter->active_checksum_context == OFFLOAD_TCP_IP)
2460 				return;
2461 			else
2462 				adapter->active_checksum_context = OFFLOAD_TCP_IP;
2463 		} else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
2464 			*txd_upper = E1000_TXD_POPTS_TXSM << 8;
2465 			*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
2466 			if (adapter->active_checksum_context == OFFLOAD_UDP_IP)
2467 				return;
2468 			else
2469 				adapter->active_checksum_context = OFFLOAD_UDP_IP;
2470 		} else {
2471 			*txd_upper = 0;
2472 			*txd_lower = 0;
2473 			return;
2474 		}
2475 	} else {
2476 		*txd_upper = 0;
2477 		*txd_lower = 0;
2478 		return;
2479 	}
2480 
2481 	/*
2482 	 * If we reach this point, the checksum offload context
2483 	 * needs to be reset.
2484 	 */
2485 	curr_txd = adapter->next_avail_tx_desc;
2486 	tx_buffer = &adapter->tx_buffer_area[curr_txd];
2487 	TXD = (struct em_context_desc *) &adapter->tx_desc_base[curr_txd];
2488 
2489 	TXD->lower_setup.ip_fields.ipcss = ETHER_HDR_LEN;
2490 	TXD->lower_setup.ip_fields.ipcso =
2491 	    ETHER_HDR_LEN + offsetof(struct ip, ip_sum);
2492 	TXD->lower_setup.ip_fields.ipcse =
2493 	    htole16(ETHER_HDR_LEN + sizeof(struct ip) - 1);
2494 
2495 	TXD->upper_setup.tcp_fields.tucss =
2496 	    ETHER_HDR_LEN + sizeof(struct ip);
2497 	TXD->upper_setup.tcp_fields.tucse = htole16(0);
2498 
2499 	if (adapter->active_checksum_context == OFFLOAD_TCP_IP) {
2500 		TXD->upper_setup.tcp_fields.tucso =
2501 		    ETHER_HDR_LEN + sizeof(struct ip) +
2502 		    offsetof(struct tcphdr, th_sum);
2503 	} else if (adapter->active_checksum_context == OFFLOAD_UDP_IP) {
2504 		TXD->upper_setup.tcp_fields.tucso =
2505 			ETHER_HDR_LEN + sizeof(struct ip) +
2506 			offsetof(struct udphdr, uh_sum);
2507 	}
2508 
2509 	TXD->tcp_seg_setup.data = htole32(0);
2510 	TXD->cmd_and_length = htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT);
2511 
2512 	tx_buffer->m_head = NULL;
2513 
2514 	if (++curr_txd == adapter->num_tx_desc)
2515 		curr_txd = 0;
2516 
2517 	adapter->num_tx_desc_avail--;
2518 	adapter->next_avail_tx_desc = curr_txd;
2519 }
2520 
2521 /**********************************************************************
2522  *
2523  *  Examine each tx_buffer in the used queue. If the hardware is done
2524  *  processing the packet then free associated resources. The
2525  *  tx_buffer is put back on the free queue.
2526  *
2527  **********************************************************************/
2528 
2529 static void
2530 em_clean_transmit_interrupts(struct adapter *adapter)
2531 {
2532 	int i, num_avail;
2533 	struct em_buffer *tx_buffer;
2534 	struct em_tx_desc *tx_desc;
2535 	struct ifnet *ifp = &adapter->interface_data.ac_if;
2536 
2537 	if (adapter->num_tx_desc_avail == adapter->num_tx_desc)
2538 		return;
2539 
2540 	num_avail = adapter->num_tx_desc_avail;
2541 	i = adapter->oldest_used_tx_desc;
2542 
2543 	tx_buffer = &adapter->tx_buffer_area[i];
2544 	tx_desc = &adapter->tx_desc_base[i];
2545 
2546 	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
2547 			BUS_DMASYNC_POSTREAD);
2548 
2549 	while(tx_desc->upper.fields.status & E1000_TXD_STAT_DD) {
2550 		tx_desc->upper.data = 0;
2551 		num_avail++;
2552 
2553 		logif(pkt_txclean);
2554 
2555 		if (tx_buffer->m_head) {
2556 			ifp->if_opackets++;
2557 			bus_dmamap_sync(adapter->txtag, tx_buffer->map,
2558 					BUS_DMASYNC_POSTWRITE);
2559 			bus_dmamap_unload(adapter->txtag, tx_buffer->map);
2560 
2561 			m_freem(tx_buffer->m_head);
2562 			tx_buffer->m_head = NULL;
2563 		}
2564 
2565 		if (++i == adapter->num_tx_desc)
2566 			i = 0;
2567 
2568 		tx_buffer = &adapter->tx_buffer_area[i];
2569 		tx_desc = &adapter->tx_desc_base[i];
2570 	}
2571 
2572 	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
2573 			BUS_DMASYNC_PREWRITE);
2574 
2575 	adapter->oldest_used_tx_desc = i;
2576 
2577 	/*
2578 	 * If we have enough room, clear IFF_OACTIVE to tell the stack
2579 	 * that it is OK to send packets.
2580 	 * If there are no pending descriptors, clear the timeout. Otherwise,
2581 	 * if some descriptors have been freed, restart the timeout.
2582 	 */
2583 	if (num_avail > EM_TX_CLEANUP_THRESHOLD) {
2584 		ifp->if_flags &= ~IFF_OACTIVE;
2585 		if (num_avail == adapter->num_tx_desc)
2586 			ifp->if_timer = 0;
2587 		else if (num_avail == adapter->num_tx_desc_avail)
2588 			ifp->if_timer = EM_TX_TIMEOUT;
2589 	}
2590 	adapter->num_tx_desc_avail = num_avail;
2591 }
2592 
2593 /*********************************************************************
2594  *
2595  *  Get a buffer from system mbuf buffer pool.
2596  *
2597  **********************************************************************/
2598 static int
2599 em_get_buf(int i, struct adapter *adapter, struct mbuf *nmp, int how)
2600 {
2601 	struct mbuf *mp = nmp;
2602 	struct em_buffer *rx_buffer;
2603 	struct ifnet *ifp;
2604 	bus_addr_t paddr;
2605 	int error;
2606 
2607 	ifp = &adapter->interface_data.ac_if;
2608 
2609 	if (mp == NULL) {
2610 		mp = m_getcl(how, MT_DATA, M_PKTHDR);
2611 		if (mp == NULL) {
2612 			adapter->mbuf_cluster_failed++;
2613 			return(ENOBUFS);
2614 		}
2615 		mp->m_len = mp->m_pkthdr.len = MCLBYTES;
2616 	} else {
2617 		mp->m_len = mp->m_pkthdr.len = MCLBYTES;
2618 		mp->m_data = mp->m_ext.ext_buf;
2619 		mp->m_next = NULL;
2620 	}
2621 	if (ifp->if_mtu <= ETHERMTU)
2622 		m_adj(mp, ETHER_ALIGN);
2623 
2624 	rx_buffer = &adapter->rx_buffer_area[i];
2625 
2626 	/*
2627 	 * Using memory from the mbuf cluster pool, invoke the
2628 	 * bus_dma machinery to arrange the memory mapping.
2629 	 */
2630 	error = bus_dmamap_load(adapter->rxtag, rx_buffer->map,
2631 				mtod(mp, void *), mp->m_len,
2632 				em_dmamap_cb, &paddr, 0);
2633 	if (error) {
2634 		m_free(mp);
2635 		return(error);
2636 	}
2637 	rx_buffer->m_head = mp;
2638 	adapter->rx_desc_base[i].buffer_addr = htole64(paddr);
2639 	bus_dmamap_sync(adapter->rxtag, rx_buffer->map, BUS_DMASYNC_PREREAD);
2640 
2641 	return(0);
2642 }
2643 
2644 /*********************************************************************
2645  *
2646  *  Allocate memory for rx_buffer structures. Since we use one
2647  *  rx_buffer per received packet, the maximum number of rx_buffer's
2648  *  that we'll need is equal to the number of receive descriptors
2649  *  that we've allocated.
2650  *
2651  **********************************************************************/
2652 static int
2653 em_allocate_receive_structures(struct adapter *adapter)
2654 {
2655 	int i, error, size;
2656 	struct em_buffer *rx_buffer;
2657 
2658 	size = adapter->num_rx_desc * sizeof(struct em_buffer);
2659 	adapter->rx_buffer_area = kmalloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
2660 
2661 	error = bus_dma_tag_create(NULL,		/* parent */
2662 				   1, 0,		/* alignment, bounds */
2663 				   BUS_SPACE_MAXADDR,	/* lowaddr */
2664 				   BUS_SPACE_MAXADDR,	/* highaddr */
2665 				   NULL, NULL,		/* filter, filterarg */
2666 				   MCLBYTES,		/* maxsize */
2667 				   1,			/* nsegments */
2668 				   MCLBYTES,		/* maxsegsize */
2669 				   BUS_DMA_ALLOCNOW,	/* flags */
2670 				   &adapter->rxtag);
2671 	if (error != 0) {
2672 		device_printf(adapter->dev, "em_allocate_receive_structures: "
2673 			      "bus_dma_tag_create failed; error %u\n", error);
2674 		goto fail_0;
2675 	}
2676 
2677 	rx_buffer = adapter->rx_buffer_area;
2678 	for (i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) {
2679 		error = bus_dmamap_create(adapter->rxtag, BUS_DMA_NOWAIT,
2680 					  &rx_buffer->map);
2681 		if (error != 0) {
2682 			device_printf(adapter->dev,
2683 				      "em_allocate_receive_structures: "
2684 				      "bus_dmamap_create failed; error %u\n",
2685 				      error);
2686 			goto fail_1;
2687 		}
2688 	}
2689 
2690 	for (i = 0; i < adapter->num_rx_desc; i++) {
2691 		error = em_get_buf(i, adapter, NULL, MB_WAIT);
2692 		if (error != 0) {
2693 			adapter->rx_buffer_area[i].m_head = NULL;
2694 			adapter->rx_desc_base[i].buffer_addr = 0;
2695 			return(error);
2696 		}
2697 	}
2698 
2699 	bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
2700 			BUS_DMASYNC_PREWRITE);
2701 
2702 	return(0);
2703 
2704 fail_1:
2705 	bus_dma_tag_destroy(adapter->rxtag);
2706 fail_0:
2707 	adapter->rxtag = NULL;
2708 	kfree(adapter->rx_buffer_area, M_DEVBUF);
2709 	adapter->rx_buffer_area = NULL;
2710 	return(error);
2711 }
2712 
2713 /*********************************************************************
2714  *
2715  *  Allocate and initialize receive structures.
2716  *
2717  **********************************************************************/
2718 static int
2719 em_setup_receive_structures(struct adapter *adapter)
2720 {
2721 	bzero((void *) adapter->rx_desc_base,
2722 	      (sizeof(struct em_rx_desc)) * adapter->num_rx_desc);
2723 
2724 	if (em_allocate_receive_structures(adapter))
2725 		return(ENOMEM);
2726 
2727 	/* Setup our descriptor pointers */
2728 	adapter->next_rx_desc_to_check = 0;
2729 	return(0);
2730 }
2731 
2732 /*********************************************************************
2733  *
2734  *  Enable receive unit.
2735  *
2736  **********************************************************************/
2737 static void
2738 em_initialize_receive_unit(struct adapter *adapter)
2739 {
2740 	uint32_t reg_rctl;
2741 	uint32_t reg_rxcsum;
2742 	struct ifnet *ifp;
2743 	uint64_t bus_addr;
2744 
2745 	INIT_DEBUGOUT("em_initialize_receive_unit: begin");
2746 
2747 	ifp = &adapter->interface_data.ac_if;
2748 
2749 	/* Make sure receives are disabled while setting up the descriptor ring */
2750 	E1000_WRITE_REG(&adapter->hw, RCTL, 0);
2751 
2752 	/* Set the Receive Delay Timer Register */
2753 	E1000_WRITE_REG(&adapter->hw, RDTR,
2754 			adapter->rx_int_delay.value | E1000_RDT_FPDB);
2755 
2756 	if(adapter->hw.mac_type >= em_82540) {
2757 		E1000_WRITE_REG(&adapter->hw, RADV,
2758 				adapter->rx_abs_int_delay.value);
2759 
2760 		/* Set the interrupt throttling rate in 256ns increments */
2761 		if (em_int_throttle_ceil) {
2762 			E1000_WRITE_REG(&adapter->hw, ITR,
2763 				1000000000 / 256 / em_int_throttle_ceil);
2764 		} else {
2765 			E1000_WRITE_REG(&adapter->hw, ITR, 0);
2766 		}
2767 	}
2768 
2769 	/* Setup the Base and Length of the Rx Descriptor Ring */
2770 	bus_addr = adapter->rxdma.dma_paddr;
2771 	E1000_WRITE_REG(&adapter->hw, RDLEN, adapter->num_rx_desc *
2772 			sizeof(struct em_rx_desc));
2773 	E1000_WRITE_REG(&adapter->hw, RDBAH, (uint32_t)(bus_addr >> 32));
2774 	E1000_WRITE_REG(&adapter->hw, RDBAL, (uint32_t)bus_addr);
2775 
2776 	/* Setup the HW Rx Head and Tail Descriptor Pointers */
2777 	E1000_WRITE_REG(&adapter->hw, RDT, adapter->num_rx_desc - 1);
2778 	E1000_WRITE_REG(&adapter->hw, RDH, 0);
2779 
2780 	/* Setup the Receive Control Register */
2781 	reg_rctl = E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
2782 		   E1000_RCTL_RDMTS_HALF |
2783 		   (adapter->hw.mc_filter_type << E1000_RCTL_MO_SHIFT);
2784 
2785 	if (adapter->hw.tbi_compatibility_on == TRUE)
2786 		reg_rctl |= E1000_RCTL_SBP;
2787 
2788 	switch (adapter->rx_buffer_len) {
2789 	default:
2790 	case EM_RXBUFFER_2048:
2791 		reg_rctl |= E1000_RCTL_SZ_2048;
2792 		break;
2793 	case EM_RXBUFFER_4096:
2794 		reg_rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX | E1000_RCTL_LPE;
2795 		break;
2796 	case EM_RXBUFFER_8192:
2797 		reg_rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX | E1000_RCTL_LPE;
2798 		break;
2799 	case EM_RXBUFFER_16384:
2800 		reg_rctl |= E1000_RCTL_SZ_16384 | E1000_RCTL_BSEX | E1000_RCTL_LPE;
2801 		break;
2802 	}
2803 
2804 	if (ifp->if_mtu > ETHERMTU)
2805 		reg_rctl |= E1000_RCTL_LPE;
2806 
2807 	/* Enable 82543 Receive Checksum Offload for TCP and UDP */
2808 	if ((adapter->hw.mac_type >= em_82543) &&
2809 	    (ifp->if_capenable & IFCAP_RXCSUM)) {
2810 		reg_rxcsum = E1000_READ_REG(&adapter->hw, RXCSUM);
2811 		reg_rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
2812 		E1000_WRITE_REG(&adapter->hw, RXCSUM, reg_rxcsum);
2813 	}
2814 
2815 	/* Enable Receives */
2816 	E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
2817 }
2818 
2819 /*********************************************************************
2820  *
2821  *  Free receive related data structures.
2822  *
2823  **********************************************************************/
2824 static void
2825 em_free_receive_structures(struct adapter *adapter)
2826 {
2827 	struct em_buffer *rx_buffer;
2828 	int i;
2829 
2830 	INIT_DEBUGOUT("free_receive_structures: begin");
2831 
2832 	if (adapter->rx_buffer_area != NULL) {
2833 		rx_buffer = adapter->rx_buffer_area;
2834 		for (i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) {
2835 			if (rx_buffer->map != NULL) {
2836 				bus_dmamap_unload(adapter->rxtag, rx_buffer->map);
2837 				bus_dmamap_destroy(adapter->rxtag, rx_buffer->map);
2838 			}
2839 			if (rx_buffer->m_head != NULL)
2840 				m_freem(rx_buffer->m_head);
2841 			rx_buffer->m_head = NULL;
2842 		}
2843 	}
2844 	if (adapter->rx_buffer_area != NULL) {
2845 		kfree(adapter->rx_buffer_area, M_DEVBUF);
2846 		adapter->rx_buffer_area = NULL;
2847 	}
2848 	if (adapter->rxtag != NULL) {
2849 		bus_dma_tag_destroy(adapter->rxtag);
2850 		adapter->rxtag = NULL;
2851 	}
2852 }
2853 
2854 /*********************************************************************
2855  *
2856  *  This routine executes in interrupt context. It replenishes
2857  *  the mbufs in the descriptor and sends data which has been
2858  *  dma'ed into host memory to upper layer.
2859  *
2860  *  We loop at most count times if count is > 0, or until done if
2861  *  count < 0.
2862  *
2863  *********************************************************************/
2864 static void
2865 em_process_receive_interrupts(struct adapter *adapter, int count)
2866 {
2867 	struct ifnet *ifp;
2868 	struct mbuf *mp;
2869 	uint8_t accept_frame = 0;
2870 	uint8_t eop = 0;
2871 	uint16_t len, desc_len, prev_len_adj;
2872 	int i;
2873 
2874 	/* Pointer to the receive descriptor being examined. */
2875 	struct em_rx_desc *current_desc;
2876 
2877 	ifp = &adapter->interface_data.ac_if;
2878 	i = adapter->next_rx_desc_to_check;
2879 	current_desc = &adapter->rx_desc_base[i];
2880 
2881 	bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
2882 			BUS_DMASYNC_POSTREAD);
2883 
2884 	if (!((current_desc->status) & E1000_RXD_STAT_DD))
2885 		return;
2886 
2887 	while ((current_desc->status & E1000_RXD_STAT_DD) && (count != 0)) {
2888 		logif(pkt_receive);
2889 		mp = adapter->rx_buffer_area[i].m_head;
2890 		bus_dmamap_sync(adapter->rxtag, adapter->rx_buffer_area[i].map,
2891 				BUS_DMASYNC_POSTREAD);
2892 		bus_dmamap_unload(adapter->rxtag,
2893 				  adapter->rx_buffer_area[i].map);
2894 
2895 		accept_frame = 1;
2896 		prev_len_adj = 0;
2897 		desc_len = le16toh(current_desc->length);
2898 		if (current_desc->status & E1000_RXD_STAT_EOP) {
2899 			count--;
2900 			eop = 1;
2901 			if (desc_len < ETHER_CRC_LEN) {
2902 				len = 0;
2903 				prev_len_adj = ETHER_CRC_LEN - desc_len;
2904 			} else {
2905 				len = desc_len - ETHER_CRC_LEN;
2906 			}
2907 		} else {
2908 			eop = 0;
2909 			len = desc_len;
2910 		}
2911 
2912 		if (current_desc->errors & E1000_RXD_ERR_FRAME_ERR_MASK) {
2913 			uint8_t last_byte;
2914 			uint32_t pkt_len = desc_len;
2915 
2916 			if (adapter->fmp != NULL)
2917 				pkt_len += adapter->fmp->m_pkthdr.len;
2918 
2919 			last_byte = *(mtod(mp, caddr_t) + desc_len - 1);
2920 
2921 			if (TBI_ACCEPT(&adapter->hw, current_desc->status,
2922 				       current_desc->errors,
2923 				       pkt_len, last_byte)) {
2924 				em_tbi_adjust_stats(&adapter->hw,
2925 						    &adapter->stats,
2926 						    pkt_len,
2927 						    adapter->hw.mac_addr);
2928 				if (len > 0)
2929 					len--;
2930 			} else {
2931 				accept_frame = 0;
2932 			}
2933 		}
2934 
2935 		if (accept_frame) {
2936 			if (em_get_buf(i, adapter, NULL, MB_DONTWAIT) == ENOBUFS) {
2937 				adapter->dropped_pkts++;
2938 				em_get_buf(i, adapter, mp, MB_DONTWAIT);
2939 				if (adapter->fmp != NULL)
2940 					m_freem(adapter->fmp);
2941 				adapter->fmp = NULL;
2942 				adapter->lmp = NULL;
2943 				break;
2944 			}
2945 
2946 			/* Assign correct length to the current fragment */
2947 			mp->m_len = len;
2948 
2949 			if (adapter->fmp == NULL) {
2950 				mp->m_pkthdr.len = len;
2951 				adapter->fmp = mp;	 /* Store the first mbuf */
2952 				adapter->lmp = mp;
2953 			} else {
2954 				/* Chain mbuf's together */
2955 				/*
2956 				 * Adjust length of previous mbuf in chain if we
2957 				 * received less than 4 bytes in the last descriptor.
2958 				 */
2959 				if (prev_len_adj > 0) {
2960 					adapter->lmp->m_len -= prev_len_adj;
2961 					adapter->fmp->m_pkthdr.len -= prev_len_adj;
2962 				}
2963 				adapter->lmp->m_next = mp;
2964 				adapter->lmp = adapter->lmp->m_next;
2965 				adapter->fmp->m_pkthdr.len += len;
2966 			}
2967 
2968 			if (eop) {
2969 				adapter->fmp->m_pkthdr.rcvif = ifp;
2970 				ifp->if_ipackets++;
2971 
2972 				em_receive_checksum(adapter, current_desc,
2973 						    adapter->fmp);
2974 				if (current_desc->status & E1000_RXD_STAT_VP) {
2975 					VLAN_INPUT_TAG(adapter->fmp,
2976 						       (current_desc->special &
2977 							E1000_RXD_SPC_VLAN_MASK));
2978 				} else {
2979 					ifp->if_input(ifp, adapter->fmp);
2980 				}
2981 				adapter->fmp = NULL;
2982 				adapter->lmp = NULL;
2983 			}
2984 		} else {
2985 			adapter->dropped_pkts++;
2986 			em_get_buf(i, adapter, mp, MB_DONTWAIT);
2987 			if (adapter->fmp != NULL)
2988 				m_freem(adapter->fmp);
2989 			adapter->fmp = NULL;
2990 			adapter->lmp = NULL;
2991 		}
2992 
2993 		/* Zero out the receive descriptors status  */
2994 		current_desc->status = 0;
2995 
2996 		/* Advance the E1000's Receive Queue #0  "Tail Pointer". */
2997 		E1000_WRITE_REG(&adapter->hw, RDT, i);
2998 
2999 		/* Advance our pointers to the next descriptor */
3000 		if (++i == adapter->num_rx_desc) {
3001 			i = 0;
3002 			current_desc = adapter->rx_desc_base;
3003 		} else {
3004 			current_desc++;
3005 		}
3006 	}
3007 
3008 	bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
3009 			BUS_DMASYNC_PREWRITE);
3010 
3011 	adapter->next_rx_desc_to_check = i;
3012 }
3013 
3014 /*********************************************************************
3015  *
3016  *  Verify that the hardware indicated that the checksum is valid.
3017  *  Inform the stack about the status of checksum so that stack
3018  *  doesn't spend time verifying the checksum.
3019  *
3020  *********************************************************************/
3021 static void
3022 em_receive_checksum(struct adapter *adapter,
3023 		    struct em_rx_desc *rx_desc,
3024 		    struct mbuf *mp)
3025 {
3026 	/* 82543 or newer only */
3027 	if ((adapter->hw.mac_type < em_82543) ||
3028 	    /* Ignore Checksum bit is set */
3029 	    (rx_desc->status & E1000_RXD_STAT_IXSM)) {
3030 		mp->m_pkthdr.csum_flags = 0;
3031 		return;
3032 	}
3033 
3034 	if (rx_desc->status & E1000_RXD_STAT_IPCS) {
3035 		/* Did it pass? */
3036 		if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
3037 			/* IP Checksum Good */
3038 			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
3039 			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
3040 		} else {
3041 			mp->m_pkthdr.csum_flags = 0;
3042 		}
3043 	}
3044 
3045 	if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
3046 		/* Did it pass? */
3047 		if (!(rx_desc->errors & E1000_RXD_ERR_TCPE)) {
3048 			mp->m_pkthdr.csum_flags |=
3049 			(CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
3050 			mp->m_pkthdr.csum_data = htons(0xffff);
3051 		}
3052 	}
3053 }
3054 
3055 
3056 static void
3057 em_enable_vlans(struct adapter *adapter)
3058 {
3059 	uint32_t ctrl;
3060 
3061 	E1000_WRITE_REG(&adapter->hw, VET, ETHERTYPE_VLAN);
3062 
3063 	ctrl = E1000_READ_REG(&adapter->hw, CTRL);
3064 	ctrl |= E1000_CTRL_VME;
3065 	E1000_WRITE_REG(&adapter->hw, CTRL, ctrl);
3066 }
3067 
3068 static void
3069 em_disable_vlans(struct adapter *adapter)
3070 {
3071         uint32_t ctrl;
3072 
3073 	ctrl = E1000_READ_REG(&adapter->hw, CTRL);
3074 	ctrl &= ~E1000_CTRL_VME;
3075 	E1000_WRITE_REG(&adapter->hw, CTRL, ctrl);
3076 }
3077 
3078 /*
3079  * note: we must call bus_enable_intr() prior to enabling the hardware
3080  * interrupt and bus_disable_intr() after disabling the hardware interrupt
3081  * in order to avoid handler execution races from scheduled interrupt
3082  * threads.
3083  */
3084 static void
3085 em_enable_intr(struct adapter *adapter)
3086 {
3087 	struct ifnet *ifp = &adapter->interface_data.ac_if;
3088 
3089 	if ((ifp->if_flags & IFF_POLLING) == 0) {
3090 		lwkt_serialize_handler_enable(ifp->if_serializer);
3091 		E1000_WRITE_REG(&adapter->hw, IMS, (IMS_ENABLE_MASK));
3092 	}
3093 }
3094 
3095 static void
3096 em_disable_intr(struct adapter *adapter)
3097 {
3098 	/*
3099 	 * The first version of 82542 had an errata where when link was
3100 	 * forced it would stay up even up even if the cable was disconnected.
3101 	 * Sequence errors were used to detect the disconnect and then the
3102 	 * driver would unforce the link.  This code in the in the ISR.  For
3103 	 * this to work correctly the Sequence error interrupt had to be
3104 	 * enabled all the time.
3105 	 */
3106 	if (adapter->hw.mac_type == em_82542_rev2_0) {
3107 		E1000_WRITE_REG(&adapter->hw, IMC,
3108 				(0xffffffff & ~E1000_IMC_RXSEQ));
3109 	} else {
3110 		E1000_WRITE_REG(&adapter->hw, IMC, 0xffffffff);
3111 	}
3112 
3113 	lwkt_serialize_handler_disable(adapter->interface_data.ac_if.if_serializer);
3114 }
3115 
3116 static int
3117 em_is_valid_ether_addr(uint8_t *addr)
3118 {
3119 	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
3120 
3121 	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN)))
3122 		return(FALSE);
3123 	else
3124 		return(TRUE);
3125 }
3126 
3127 void
3128 em_write_pci_cfg(struct em_hw *hw, uint32_t reg, uint16_t *value)
3129 {
3130 	pci_write_config(((struct em_osdep *)hw->back)->dev, reg, *value, 2);
3131 }
3132 
3133 void
3134 em_read_pci_cfg(struct em_hw *hw, uint32_t reg, uint16_t *value)
3135 {
3136 	*value = pci_read_config(((struct em_osdep *)hw->back)->dev, reg, 2);
3137 }
3138 
3139 void
3140 em_pci_set_mwi(struct em_hw *hw)
3141 {
3142 	pci_write_config(((struct em_osdep *)hw->back)->dev, PCIR_COMMAND,
3143 			 (hw->pci_cmd_word | CMD_MEM_WRT_INVALIDATE), 2);
3144 }
3145 
3146 void
3147 em_pci_clear_mwi(struct em_hw *hw)
3148 {
3149 	pci_write_config(((struct em_osdep *)hw->back)->dev, PCIR_COMMAND,
3150 			 (hw->pci_cmd_word & ~CMD_MEM_WRT_INVALIDATE), 2);
3151 }
3152 
3153 uint32_t
3154 em_read_reg_io(struct em_hw *hw, uint32_t offset)
3155 {
3156 	bus_space_write_4(hw->reg_io_tag, hw->reg_io_handle, 0, offset);
3157 	return(bus_space_read_4(hw->reg_io_tag, hw->reg_io_handle, 4));
3158 }
3159 
3160 void
3161 em_write_reg_io(struct em_hw *hw, uint32_t offset, uint32_t value)
3162 {
3163 	bus_space_write_4(hw->reg_io_tag, hw->reg_io_handle, 0, offset);
3164 	bus_space_write_4(hw->reg_io_tag, hw->reg_io_handle, 4, value);
3165 }
3166 
3167 /*********************************************************************
3168  * 82544 Coexistence issue workaround.
3169  *    There are 2 issues.
3170  *	1. Transmit Hang issue.
3171  *    To detect this issue, following equation can be used...
3172  *          SIZE[3:0] + ADDR[2:0] = SUM[3:0].
3173  *          If SUM[3:0] is in between 1 to 4, we will have this issue.
3174  *
3175  *	2. DAC issue.
3176  *    To detect this issue, following equation can be used...
3177  *          SIZE[3:0] + ADDR[2:0] = SUM[3:0].
3178  *          If SUM[3:0] is in between 9 to c, we will have this issue.
3179  *
3180  *
3181  *    WORKAROUND:
3182  *          Make sure we do not have ending address as 1,2,3,4(Hang) or
3183  *          9,a,b,c (DAC)
3184  *
3185 *************************************************************************/
3186 static uint32_t
3187 em_fill_descriptors(uint64_t address, uint32_t length, PDESC_ARRAY desc_array)
3188 {
3189 	/* Since issue is sensitive to length and address.*/
3190 	/* Let us first check the address...*/
3191 	uint32_t safe_terminator;
3192 	if (length <= 4) {
3193 		desc_array->descriptor[0].address = address;
3194 		desc_array->descriptor[0].length = length;
3195 		desc_array->elements = 1;
3196 		return(desc_array->elements);
3197 	}
3198 	safe_terminator = (uint32_t)((((uint32_t)address & 0x7) + (length & 0xF)) & 0xF);
3199 	/* if it does not fall between 0x1 to 0x4 and 0x9 to 0xC then return */
3200 	if (safe_terminator == 0 ||
3201 	    (safe_terminator > 4 && safe_terminator < 9) ||
3202 	    (safe_terminator > 0xC && safe_terminator <= 0xF)) {
3203 		desc_array->descriptor[0].address = address;
3204 		desc_array->descriptor[0].length = length;
3205 		desc_array->elements = 1;
3206 		return(desc_array->elements);
3207 	}
3208 
3209 	desc_array->descriptor[0].address = address;
3210 	desc_array->descriptor[0].length = length - 4;
3211 	desc_array->descriptor[1].address = address + (length - 4);
3212 	desc_array->descriptor[1].length = 4;
3213 	desc_array->elements = 2;
3214 	return(desc_array->elements);
3215 }
3216 
3217 /**********************************************************************
3218  *
3219  *  Update the board statistics counters.
3220  *
3221  **********************************************************************/
3222 static void
3223 em_update_stats_counters(struct adapter *adapter)
3224 {
3225 	struct ifnet   *ifp;
3226 
3227 	if (adapter->hw.media_type == em_media_type_copper ||
3228 	    (E1000_READ_REG(&adapter->hw, STATUS) & E1000_STATUS_LU)) {
3229 		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, SYMERRS);
3230 		adapter->stats.sec += E1000_READ_REG(&adapter->hw, SEC);
3231 	}
3232 	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, CRCERRS);
3233 	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, MPC);
3234 	adapter->stats.scc += E1000_READ_REG(&adapter->hw, SCC);
3235 	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, ECOL);
3236 
3237 	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, MCC);
3238 	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, LATECOL);
3239 	adapter->stats.colc += E1000_READ_REG(&adapter->hw, COLC);
3240 	adapter->stats.dc += E1000_READ_REG(&adapter->hw, DC);
3241 	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, RLEC);
3242 	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, XONRXC);
3243 	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, XONTXC);
3244 	adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, XOFFRXC);
3245 	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, XOFFTXC);
3246 	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, FCRUC);
3247 	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, PRC64);
3248 	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, PRC127);
3249 	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, PRC255);
3250 	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, PRC511);
3251 	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, PRC1023);
3252 	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, PRC1522);
3253 	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, GPRC);
3254 	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, BPRC);
3255 	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, MPRC);
3256 	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, GPTC);
3257 
3258 	/* For the 64-bit byte counters the low dword must be read first. */
3259 	/* Both registers clear on the read of the high dword */
3260 
3261 	adapter->stats.gorcl += E1000_READ_REG(&adapter->hw, GORCL);
3262 	adapter->stats.gorch += E1000_READ_REG(&adapter->hw, GORCH);
3263 	adapter->stats.gotcl += E1000_READ_REG(&adapter->hw, GOTCL);
3264 	adapter->stats.gotch += E1000_READ_REG(&adapter->hw, GOTCH);
3265 
3266 	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, RNBC);
3267 	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, RUC);
3268 	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, RFC);
3269 	adapter->stats.roc += E1000_READ_REG(&adapter->hw, ROC);
3270 	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, RJC);
3271 
3272 	adapter->stats.torl += E1000_READ_REG(&adapter->hw, TORL);
3273 	adapter->stats.torh += E1000_READ_REG(&adapter->hw, TORH);
3274 	adapter->stats.totl += E1000_READ_REG(&adapter->hw, TOTL);
3275 	adapter->stats.toth += E1000_READ_REG(&adapter->hw, TOTH);
3276 
3277 	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, TPR);
3278 	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, TPT);
3279 	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, PTC64);
3280 	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, PTC127);
3281 	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, PTC255);
3282 	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, PTC511);
3283 	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, PTC1023);
3284 	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, PTC1522);
3285 	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, MPTC);
3286 	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, BPTC);
3287 
3288 	if (adapter->hw.mac_type >= em_82543) {
3289 		adapter->stats.algnerrc +=
3290 		    E1000_READ_REG(&adapter->hw, ALGNERRC);
3291 		adapter->stats.rxerrc +=
3292 		    E1000_READ_REG(&adapter->hw, RXERRC);
3293 		adapter->stats.tncrs +=
3294 		    E1000_READ_REG(&adapter->hw, TNCRS);
3295 		adapter->stats.cexterr +=
3296 		    E1000_READ_REG(&adapter->hw, CEXTERR);
3297 		adapter->stats.tsctc +=
3298 		    E1000_READ_REG(&adapter->hw, TSCTC);
3299 		adapter->stats.tsctfc +=
3300 		    E1000_READ_REG(&adapter->hw, TSCTFC);
3301 	}
3302 	ifp = &adapter->interface_data.ac_if;
3303 
3304 	/* Fill out the OS statistics structure */
3305 	ifp->if_collisions = adapter->stats.colc;
3306 
3307 	/* Rx Errors */
3308 	ifp->if_ierrors =
3309 		adapter->dropped_pkts +
3310 		adapter->stats.rxerrc +
3311 		adapter->stats.crcerrs +
3312 		adapter->stats.algnerrc +
3313 		adapter->stats.ruc + adapter->stats.roc +
3314 		adapter->stats.mpc + adapter->stats.cexterr +
3315 		adapter->rx_overruns;
3316 
3317 	/* Tx Errors */
3318 	ifp->if_oerrors = adapter->stats.ecol + adapter->stats.latecol +
3319 			  adapter->watchdog_timeouts;
3320 }
3321 
3322 
3323 /**********************************************************************
3324  *
3325  *  This routine is called only when em_display_debug_stats is enabled.
3326  *  This routine provides a way to take a look at important statistics
3327  *  maintained by the driver and hardware.
3328  *
3329  **********************************************************************/
3330 static void
3331 em_print_debug_info(struct adapter *adapter)
3332 {
3333 	device_t dev= adapter->dev;
3334 	uint8_t *hw_addr = adapter->hw.hw_addr;
3335 
3336 	device_printf(dev, "Adapter hardware address = %p \n", hw_addr);
3337 	device_printf(dev, "CTRL  = 0x%x\n",
3338 		      E1000_READ_REG(&adapter->hw, CTRL));
3339 	device_printf(dev, "RCTL  = 0x%x PS=(0x8402)\n",
3340 		      E1000_READ_REG(&adapter->hw, RCTL));
3341 	device_printf(dev, "Packet buffer = Tx=%dk Rx=%dk\n",
3342 		      ((E1000_READ_REG(&adapter->hw, PBA) & 0xffff0000) >> 16),
3343 		      (E1000_READ_REG(&adapter->hw, PBA) & 0xffff));
3344 	device_printf(dev, "Flow control watermarks high = %d low = %d\n",
3345 		      adapter->hw.fc_high_water, adapter->hw.fc_low_water);
3346 	device_printf(dev, "tx_int_delay = %d, tx_abs_int_delay = %d\n",
3347 		      E1000_READ_REG(&adapter->hw, TIDV),
3348 		      E1000_READ_REG(&adapter->hw, TADV));
3349 	device_printf(dev, "rx_int_delay = %d, rx_abs_int_delay = %d\n",
3350 		      E1000_READ_REG(&adapter->hw, RDTR),
3351 		      E1000_READ_REG(&adapter->hw, RADV));
3352 	device_printf(dev, "fifo workaround = %lld, fifo_reset = %lld\n",
3353 		      (long long)adapter->tx_fifo_wrk_cnt,
3354 		      (long long)adapter->tx_fifo_reset_cnt);
3355 	device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
3356 		      E1000_READ_REG(&adapter->hw, TDH),
3357 		      E1000_READ_REG(&adapter->hw, TDT));
3358 	device_printf(dev, "Num Tx descriptors avail = %d\n",
3359 		      adapter->num_tx_desc_avail);
3360 	device_printf(dev, "Tx Descriptors not avail1 = %ld\n",
3361 		      adapter->no_tx_desc_avail1);
3362 	device_printf(dev, "Tx Descriptors not avail2 = %ld\n",
3363 		      adapter->no_tx_desc_avail2);
3364 	device_printf(dev, "Std mbuf failed = %ld\n",
3365 		      adapter->mbuf_alloc_failed);
3366 	device_printf(dev, "Std mbuf cluster failed = %ld\n",
3367 		      adapter->mbuf_cluster_failed);
3368 	device_printf(dev, "Driver dropped packets = %ld\n",
3369 		      adapter->dropped_pkts);
3370 }
3371 
3372 static void
3373 em_print_hw_stats(struct adapter *adapter)
3374 {
3375 	device_t dev= adapter->dev;
3376 
3377 	device_printf(dev, "Adapter: %p\n", adapter);
3378 
3379 	device_printf(dev, "Excessive collisions = %lld\n",
3380 		      (long long)adapter->stats.ecol);
3381 	device_printf(dev, "Symbol errors = %lld\n",
3382 		      (long long)adapter->stats.symerrs);
3383 	device_printf(dev, "Sequence errors = %lld\n",
3384 		      (long long)adapter->stats.sec);
3385 	device_printf(dev, "Defer count = %lld\n",
3386 		      (long long)adapter->stats.dc);
3387 
3388 	device_printf(dev, "Missed Packets = %lld\n",
3389 		      (long long)adapter->stats.mpc);
3390 	device_printf(dev, "Receive No Buffers = %lld\n",
3391 		      (long long)adapter->stats.rnbc);
3392 	device_printf(dev, "Receive length errors = %lld\n",
3393 		      (long long)adapter->stats.rlec);
3394 	device_printf(dev, "Receive errors = %lld\n",
3395 		      (long long)adapter->stats.rxerrc);
3396 	device_printf(dev, "Crc errors = %lld\n",
3397 		      (long long)adapter->stats.crcerrs);
3398 	device_printf(dev, "Alignment errors = %lld\n",
3399 		      (long long)adapter->stats.algnerrc);
3400 	device_printf(dev, "Carrier extension errors = %lld\n",
3401 		      (long long)adapter->stats.cexterr);
3402 	device_printf(dev, "RX overruns = %lu\n", adapter->rx_overruns);
3403 	device_printf(dev, "Watchdog timeouts = %lu\n",
3404 		      adapter->watchdog_timeouts);
3405 
3406 	device_printf(dev, "XON Rcvd = %lld\n",
3407 		      (long long)adapter->stats.xonrxc);
3408 	device_printf(dev, "XON Xmtd = %lld\n",
3409 		      (long long)adapter->stats.xontxc);
3410 	device_printf(dev, "XOFF Rcvd = %lld\n",
3411 		      (long long)adapter->stats.xoffrxc);
3412 	device_printf(dev, "XOFF Xmtd = %lld\n",
3413 		      (long long)adapter->stats.xofftxc);
3414 
3415 	device_printf(dev, "Good Packets Rcvd = %lld\n",
3416 		      (long long)adapter->stats.gprc);
3417 	device_printf(dev, "Good Packets Xmtd = %lld\n",
3418 		      (long long)adapter->stats.gptc);
3419 }
3420 
3421 static int
3422 em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
3423 {
3424 	int error;
3425 	int result;
3426 	struct adapter *adapter;
3427 
3428 	result = -1;
3429 	error = sysctl_handle_int(oidp, &result, 0, req);
3430 
3431 	if (error || !req->newptr)
3432 		return(error);
3433 
3434 	if (result == 1) {
3435 		adapter = (struct adapter *)arg1;
3436 		em_print_debug_info(adapter);
3437 	}
3438 
3439 	return(error);
3440 }
3441 
3442 static int
3443 em_sysctl_stats(SYSCTL_HANDLER_ARGS)
3444 {
3445 	int error;
3446 	int result;
3447 	struct adapter *adapter;
3448 
3449 	result = -1;
3450 	error = sysctl_handle_int(oidp, &result, 0, req);
3451 
3452 	if (error || !req->newptr)
3453 		return(error);
3454 
3455 	if (result == 1) {
3456 		adapter = (struct adapter *)arg1;
3457 		em_print_hw_stats(adapter);
3458 	}
3459 
3460 	return(error);
3461 }
3462 
3463 static int
3464 em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
3465 {
3466 	struct em_int_delay_info *info;
3467 	struct adapter *adapter;
3468 	uint32_t regval;
3469 	int error;
3470 	int usecs;
3471 	int ticks;
3472 
3473 	info = (struct em_int_delay_info *)arg1;
3474 	adapter = info->adapter;
3475 	usecs = info->value;
3476 	error = sysctl_handle_int(oidp, &usecs, 0, req);
3477 	if (error != 0 || req->newptr == NULL)
3478 		return(error);
3479 	if (usecs < 0 || usecs > E1000_TICKS_TO_USECS(65535))
3480 		return(EINVAL);
3481 	info->value = usecs;
3482 	ticks = E1000_USECS_TO_TICKS(usecs);
3483 
3484 	lwkt_serialize_enter(adapter->interface_data.ac_if.if_serializer);
3485 	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
3486 	regval = (regval & ~0xffff) | (ticks & 0xffff);
3487 	/* Handle a few special cases. */
3488 	switch (info->offset) {
3489 	case E1000_RDTR:
3490 	case E1000_82542_RDTR:
3491 		regval |= E1000_RDT_FPDB;
3492 		break;
3493 	case E1000_TIDV:
3494 	case E1000_82542_TIDV:
3495 		if (ticks == 0) {
3496 			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
3497 			/* Don't write 0 into the TIDV register. */
3498 			regval++;
3499 		} else
3500 			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3501 		break;
3502 	}
3503 	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
3504 	lwkt_serialize_exit(adapter->interface_data.ac_if.if_serializer);
3505 	return(0);
3506 }
3507 
3508 static void
3509 em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
3510 			const char *description, struct em_int_delay_info *info,
3511 			int offset, int value)
3512 {
3513 	info->adapter = adapter;
3514 	info->offset = offset;
3515 	info->value = value;
3516 	SYSCTL_ADD_PROC(&adapter->sysctl_ctx,
3517 			SYSCTL_CHILDREN(adapter->sysctl_tree),
3518 			OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
3519 			info, 0, em_sysctl_int_delay, "I", description);
3520 }
3521 
3522 static int
3523 em_sysctl_int_throttle(SYSCTL_HANDLER_ARGS)
3524 {
3525 	struct adapter *adapter = (void *)arg1;
3526 	int error;
3527 	int throttle;
3528 
3529 	throttle = em_int_throttle_ceil;
3530 	error = sysctl_handle_int(oidp, &throttle, 0, req);
3531 	if (error || req->newptr == NULL)
3532 		return error;
3533 	if (throttle < 0 || throttle > 1000000000 / 256)
3534 		return EINVAL;
3535 	if (throttle) {
3536 		/*
3537 		 * Set the interrupt throttling rate in 256ns increments,
3538 		 * recalculate sysctl value assignment to get exact frequency.
3539 		 */
3540 		throttle = 1000000000 / 256 / throttle;
3541 		lwkt_serialize_enter(adapter->interface_data.ac_if.if_serializer);
3542 		em_int_throttle_ceil = 1000000000 / 256 / throttle;
3543 		E1000_WRITE_REG(&adapter->hw, ITR, throttle);
3544 		lwkt_serialize_exit(adapter->interface_data.ac_if.if_serializer);
3545 	} else {
3546 		lwkt_serialize_enter(adapter->interface_data.ac_if.if_serializer);
3547 		em_int_throttle_ceil = 0;
3548 		E1000_WRITE_REG(&adapter->hw, ITR, 0);
3549 		lwkt_serialize_exit(adapter->interface_data.ac_if.if_serializer);
3550 	}
3551 	device_printf(adapter->dev, "Interrupt moderation set to %d/sec\n",
3552 			em_int_throttle_ceil);
3553 	return 0;
3554 }
3555 
3556