xref: /openbsd-src/sys/dev/pci/if_em.c (revision 1a8dbaac879b9f3335ad7fb25429ce63ac1d6bac)
1 /**************************************************************************
2 
3 Copyright (c) 2001-2003, Intel Corporation
4 All rights reserved.
5 
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8 
9  1. Redistributions of source code must retain the above copyright notice,
10     this list of conditions and the following disclaimer.
11 
12  2. Redistributions in binary form must reproduce the above copyright
13     notice, this list of conditions and the following disclaimer in the
14     documentation and/or other materials provided with the distribution.
15 
16  3. Neither the name of the Intel Corporation nor the names of its
17     contributors may be used to endorse or promote products derived from
18     this software without specific prior written permission.
19 
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 POSSIBILITY OF SUCH DAMAGE.
31 
32 ***************************************************************************/
33 
34 /* $OpenBSD: if_em.c,v 1.356 2020/07/12 05:21:34 dlg Exp $ */
35 /* $FreeBSD: if_em.c,v 1.46 2004/09/29 18:28:28 mlaier Exp $ */
36 
37 #include <dev/pci/if_em.h>
38 #include <dev/pci/if_em_soc.h>
39 
40 /*********************************************************************
41  *  Driver version
42  *********************************************************************/
43 
44 #define EM_DRIVER_VERSION	"6.2.9"
45 
46 /*********************************************************************
47  *  PCI Device ID Table
48  *********************************************************************/
49 const struct pci_matchid em_devices[] = {
50 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_80003ES2LAN_CPR_DPT },
51 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_80003ES2LAN_SDS_DPT },
52 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_80003ES2LAN_CPR_SPT },
53 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_80003ES2LAN_SDS_SPT },
54 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82540EM },
55 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82540EM_LOM },
56 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82540EP },
57 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82540EP_LOM },
58 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82540EP_LP },
59 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82541EI },
60 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82541EI_MOBILE },
61 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82541ER },
62 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82541ER_LOM },
63 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82541GI },
64 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82541GI_LF },
65 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82541GI_MOBILE },
66 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82542 },
67 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82543GC_COPPER },
68 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82543GC_FIBER },
69 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82544EI_COPPER },
70 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82544EI_FIBER },
71 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82544GC_COPPER },
72 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82544GC_LOM },
73 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82545EM_COPPER },
74 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82545EM_FIBER },
75 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82545GM_COPPER },
76 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82545GM_FIBER },
77 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82545GM_SERDES },
78 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82546EB_COPPER },
79 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82546EB_FIBER },
80 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82546EB_QUAD_CPR },
81 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82546GB_COPPER },
82 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82546GB_FIBER },
83 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82546GB_PCIE },
84 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82546GB_QUAD_CPR },
85 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82546GB_QUAD_CPR_K },
86 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82546GB_SERDES },
87 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82546GB_2 },
88 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82547EI },
89 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82547EI_MOBILE },
90 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82547GI },
91 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82571EB_AF },
92 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82571EB_AT },
93 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82571EB_COPPER },
94 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82571EB_FIBER },
95 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82571EB_QUAD_CPR },
96 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82571EB_QUAD_CPR_LP },
97 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82571EB_QUAD_FBR },
98 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82571EB_SERDES },
99 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82571EB_SDS_DUAL },
100 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82571EB_SDS_QUAD },
101 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82571PT_QUAD_CPR },
102 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82572EI_COPPER },
103 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82572EI_FIBER },
104 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82572EI_SERDES },
105 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82572EI },
106 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82573E },
107 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82573E_IAMT },
108 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82573E_PM },
109 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82573L },
110 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82573L_PL_1 },
111 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82573L_PL_2 },
112 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82573V_PM },
113 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82574L },
114 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82574LA },
115 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82575EB_COPPER },
116 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82575EB_SERDES },
117 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82575GB_QUAD_CPR },
118 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82575GB_QP_PM },
119 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82576 },
120 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82576_FIBER },
121 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82576_SERDES },
122 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82576_QUAD_COPPER },
123 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82576_QUAD_CU_ET2 },
124 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82576_NS },
125 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82576_NS_SERDES },
126 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82576_SERDES_QUAD },
127 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82577LC },
128 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82577LM },
129 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82578DC },
130 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82578DM },
131 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82579LM },
132 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82579V },
133 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I210_COPPER },
134 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I210_COPPER_OEM1 },
135 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I210_COPPER_IT },
136 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I210_FIBER },
137 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I210_SERDES },
138 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I210_SGMII },
139 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I210_COPPER_NF },
140 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I210_SERDES_NF },
141 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I211_COPPER },
142 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I217_LM },
143 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I217_V },
144 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I218_LM },
145 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I218_LM_2 },
146 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I218_LM_3 },
147 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I218_V },
148 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I218_V_2 },
149 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I218_V_3 },
150 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM },
151 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM2 },
152 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM3 },
153 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM4 },
154 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM5 },
155 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM6 },
156 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM7 },
157 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM8 },
158 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM9 },
159 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM10 },
160 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM11 },
161 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM12 },
162 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM13 },
163 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM14 },
164 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM15 },
165 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V },
166 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V2 },
167 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V4 },
168 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V5 },
169 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V6 },
170 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V7 },
171 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V8 },
172 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V9 },
173 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V10 },
174 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V11 },
175 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V12 },
176 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V13 },
177 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V14 },
178 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82580_COPPER },
179 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82580_FIBER },
180 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82580_SERDES },
181 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82580_SGMII },
182 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82580_COPPER_DUAL },
183 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82580_QUAD_FIBER },
184 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_DH89XXCC_SGMII },
185 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_DH89XXCC_SERDES },
186 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_DH89XXCC_BPLANE },
187 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_DH89XXCC_SFP },
188 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82583V },
189 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I350_COPPER },
190 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I350_FIBER },
191 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I350_SERDES },
192 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I350_SGMII },
193 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I354_BP_1GBPS },
194 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I354_BP_2_5GBPS },
195 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I354_SGMII },
196 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH8_82567V_3 },
197 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH8_IFE },
198 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH8_IFE_G },
199 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH8_IFE_GT },
200 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH8_IGP_AMT },
201 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH8_IGP_C },
202 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH8_IGP_M },
203 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH8_IGP_M_AMT },
204 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH9_BM },
205 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH9_IFE },
206 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH9_IFE_G },
207 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH9_IFE_GT },
208 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH9_IGP_AMT },
209 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH9_IGP_C },
210 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH9_IGP_M },
211 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH9_IGP_M_AMT },
212 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH9_IGP_M_V },
213 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH10_D_BM_LF },
214 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH10_D_BM_LM },
215 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH10_D_BM_V },
216 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH10_R_BM_LF },
217 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH10_R_BM_LM },
218 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH10_R_BM_V },
219 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_EP80579_LAN_1 },
220 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_EP80579_LAN_2 },
221 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_EP80579_LAN_3 },
222 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_EP80579_LAN_4 },
223 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_EP80579_LAN_5 },
224 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_EP80579_LAN_6 }
225 };
226 
227 /*********************************************************************
228  *  Function prototypes
229  *********************************************************************/
230 int  em_probe(struct device *, void *, void *);
231 void em_attach(struct device *, struct device *, void *);
232 void em_defer_attach(struct device*);
233 int  em_detach(struct device *, int);
234 int  em_activate(struct device *, int);
235 int  em_intr(void *);
236 int  em_allocate_legacy(struct em_softc *);
237 void em_start(struct ifqueue *);
238 int  em_ioctl(struct ifnet *, u_long, caddr_t);
239 void em_watchdog(struct ifnet *);
240 void em_init(void *);
241 void em_stop(void *, int);
242 void em_media_status(struct ifnet *, struct ifmediareq *);
243 int  em_media_change(struct ifnet *);
244 uint64_t  em_flowstatus(struct em_softc *);
245 void em_identify_hardware(struct em_softc *);
246 int  em_allocate_pci_resources(struct em_softc *);
247 void em_free_pci_resources(struct em_softc *);
248 void em_local_timer(void *);
249 int  em_hardware_init(struct em_softc *);
250 void em_setup_interface(struct em_softc *);
251 int  em_setup_transmit_structures(struct em_softc *);
252 void em_initialize_transmit_unit(struct em_softc *);
253 int  em_setup_receive_structures(struct em_softc *);
254 void em_initialize_receive_unit(struct em_softc *);
255 void em_enable_intr(struct em_softc *);
256 void em_disable_intr(struct em_softc *);
257 void em_free_transmit_structures(struct em_softc *);
258 void em_free_receive_structures(struct em_softc *);
259 void em_update_stats_counters(struct em_softc *);
260 void em_disable_aspm(struct em_softc *);
261 void em_txeof(struct em_queue *);
262 int  em_allocate_receive_structures(struct em_softc *);
263 int  em_allocate_transmit_structures(struct em_softc *);
264 int  em_allocate_desc_rings(struct em_softc *);
265 int  em_rxfill(struct em_queue *);
266 void em_rxrefill(void *);
267 int  em_rxeof(struct em_queue *);
268 void em_receive_checksum(struct em_softc *, struct em_rx_desc *,
269 			 struct mbuf *);
270 u_int	em_transmit_checksum_setup(struct em_queue *, struct mbuf *, u_int,
271 	    u_int32_t *, u_int32_t *);
272 void em_iff(struct em_softc *);
273 void em_update_link_status(struct em_softc *);
274 int  em_get_buf(struct em_queue *, int);
275 void em_enable_hw_vlans(struct em_softc *);
276 u_int em_encap(struct em_queue *, struct mbuf *);
277 void em_smartspeed(struct em_softc *);
278 int  em_82547_fifo_workaround(struct em_softc *, int);
279 void em_82547_update_fifo_head(struct em_softc *, int);
280 int  em_82547_tx_fifo_reset(struct em_softc *);
281 void em_82547_move_tail(void *arg);
282 void em_82547_move_tail_locked(struct em_softc *);
283 int  em_dma_malloc(struct em_softc *, bus_size_t, struct em_dma_alloc *);
284 void em_dma_free(struct em_softc *, struct em_dma_alloc *);
285 u_int32_t em_fill_descriptors(u_int64_t address, u_int32_t length,
286 			      PDESC_ARRAY desc_array);
287 void em_flush_tx_ring(struct em_queue *);
288 void em_flush_rx_ring(struct em_queue *);
289 void em_flush_desc_rings(struct em_softc *);
290 
291 #ifndef SMALL_KERNEL
292 /* MSIX/Multiqueue functions */
293 int  em_allocate_msix(struct em_softc *);
294 int  em_setup_queues_msix(struct em_softc *);
295 int  em_queue_intr_msix(void *);
296 int  em_link_intr_msix(void *);
297 void em_enable_queue_intr_msix(struct em_queue *);
298 #else
299 #define em_allocate_msix(_sc) 	(-1)
300 #endif
301 
302 #if NKSTAT > 0
303 void	em_kstat_attach(struct em_softc *);
304 int	em_kstat_read(struct kstat *);
305 void	em_tbi_adjust_stats(struct em_softc *, uint32_t, uint8_t *);
306 #endif
307 
308 /*********************************************************************
309  *  OpenBSD Device Interface Entry Points
310  *********************************************************************/
311 
312 struct cfattach em_ca = {
313 	sizeof(struct em_softc), em_probe, em_attach, em_detach,
314 	em_activate
315 };
316 
317 struct cfdriver em_cd = {
318 	NULL, "em", DV_IFNET
319 };
320 
321 static int em_smart_pwr_down = FALSE;
322 int em_enable_msix = 0;
323 
324 /*********************************************************************
325  *  Device identification routine
326  *
327  *  em_probe determines if the driver should be loaded on
328  *  adapter based on PCI vendor/device id of the adapter.
329  *
330  *  return 0 on no match, positive on match
331  *********************************************************************/
332 
333 int
334 em_probe(struct device *parent, void *match, void *aux)
335 {
336 	INIT_DEBUGOUT("em_probe: begin");
337 
338 	return (pci_matchbyid((struct pci_attach_args *)aux, em_devices,
339 	    nitems(em_devices)));
340 }
341 
342 void
343 em_defer_attach(struct device *self)
344 {
345 	struct em_softc *sc = (struct em_softc *)self;
346 	struct pci_attach_args *pa = &sc->osdep.em_pa;
347 	pci_chipset_tag_t	pc = pa->pa_pc;
348 	void *gcu;
349 
350 	INIT_DEBUGOUT("em_defer_attach: begin");
351 
352 	if ((gcu = em_lookup_gcu(self)) == 0) {
353 		printf("%s: No GCU found, defered attachment failed\n",
354 		    DEVNAME(sc));
355 
356 		if (sc->sc_intrhand)
357 			pci_intr_disestablish(pc, sc->sc_intrhand);
358 		sc->sc_intrhand = 0;
359 
360 		em_stop(sc, 1);
361 
362 		em_free_pci_resources(sc);
363 
364 		return;
365 	}
366 
367 	sc->hw.gcu = gcu;
368 
369 	em_attach_miibus(self);
370 
371 	em_setup_interface(sc);
372 
373 	em_setup_link(&sc->hw);
374 
375 	em_update_link_status(sc);
376 }
377 
378 /*********************************************************************
379  *  Device initialization routine
380  *
381  *  The attach entry point is called when the driver is being loaded.
382  *  This routine identifies the type of hardware, allocates all resources
383  *  and initializes the hardware.
384  *
385  *********************************************************************/
386 
387 void
388 em_attach(struct device *parent, struct device *self, void *aux)
389 {
390 	struct pci_attach_args *pa = aux;
391 	struct em_softc *sc;
392 	int defer = 0;
393 
394 	INIT_DEBUGOUT("em_attach: begin");
395 
396 	sc = (struct em_softc *)self;
397 	sc->sc_dmat = pa->pa_dmat;
398 	sc->osdep.em_pa = *pa;
399 
400 	timeout_set(&sc->timer_handle, em_local_timer, sc);
401 	timeout_set(&sc->tx_fifo_timer_handle, em_82547_move_tail, sc);
402 
403 	/* Determine hardware revision */
404 	em_identify_hardware(sc);
405 
406 	/*
407 	 * Only use MSI on the newer PCIe parts, with the exception
408 	 * of 82571/82572 due to "Byte Enables 2 and 3 Are Not Set" errata
409 	 */
410 	if (sc->hw.mac_type <= em_82572)
411 		sc->osdep.em_pa.pa_flags &= ~PCI_FLAGS_MSI_ENABLED;
412 
413 	/* Parameters (to be read from user) */
414 	if (sc->hw.mac_type >= em_82544) {
415 		sc->sc_tx_slots = EM_MAX_TXD;
416 		sc->sc_rx_slots = EM_MAX_RXD;
417 	} else {
418 		sc->sc_tx_slots = EM_MAX_TXD_82543;
419 		sc->sc_rx_slots = EM_MAX_RXD_82543;
420 	}
421 	sc->tx_int_delay = EM_TIDV;
422 	sc->tx_abs_int_delay = EM_TADV;
423 	sc->rx_int_delay = EM_RDTR;
424 	sc->rx_abs_int_delay = EM_RADV;
425 	sc->hw.autoneg = DO_AUTO_NEG;
426 	sc->hw.wait_autoneg_complete = WAIT_FOR_AUTO_NEG_DEFAULT;
427 	sc->hw.autoneg_advertised = AUTONEG_ADV_DEFAULT;
428 	sc->hw.tbi_compatibility_en = TRUE;
429 	sc->sc_rx_buffer_len = EM_RXBUFFER_2048;
430 
431 	sc->hw.phy_init_script = 1;
432 	sc->hw.phy_reset_disable = FALSE;
433 
434 #ifndef EM_MASTER_SLAVE
435 	sc->hw.master_slave = em_ms_hw_default;
436 #else
437 	sc->hw.master_slave = EM_MASTER_SLAVE;
438 #endif
439 
440 	/*
441 	 * This controls when hardware reports transmit completion
442 	 * status.
443 	 */
444 	sc->hw.report_tx_early = 1;
445 
446 	if (em_allocate_pci_resources(sc))
447 		goto err_pci;
448 
449 	/* Initialize eeprom parameters */
450 	em_init_eeprom_params(&sc->hw);
451 
452 	/*
453 	 * Set the max frame size assuming standard Ethernet
454 	 * sized frames.
455 	 */
456 	switch (sc->hw.mac_type) {
457 		case em_82573:
458 		{
459 			uint16_t	eeprom_data = 0;
460 
461 			/*
462 			 * 82573 only supports Jumbo frames
463 			 * if ASPM is disabled.
464 			 */
465 			em_read_eeprom(&sc->hw, EEPROM_INIT_3GIO_3,
466 			    1, &eeprom_data);
467 			if (eeprom_data & EEPROM_WORD1A_ASPM_MASK) {
468 				sc->hw.max_frame_size = ETHER_MAX_LEN;
469 				break;
470 			}
471 			/* Allow Jumbo frames */
472 			/* FALLTHROUGH */
473 		}
474 		case em_82571:
475 		case em_82572:
476 		case em_82574:
477 		case em_82575:
478 		case em_82576:
479 		case em_82580:
480 		case em_i210:
481 		case em_i350:
482 		case em_ich9lan:
483 		case em_ich10lan:
484 		case em_pch2lan:
485 		case em_pch_lpt:
486 		case em_pch_spt:
487 		case em_pch_cnp:
488 		case em_80003es2lan:
489 			/* 9K Jumbo Frame size */
490 			sc->hw.max_frame_size = 9234;
491 			break;
492 		case em_pchlan:
493 			sc->hw.max_frame_size = 4096;
494 			break;
495 		case em_82542_rev2_0:
496 		case em_82542_rev2_1:
497 		case em_ich8lan:
498 			/* Adapters that do not support Jumbo frames */
499 			sc->hw.max_frame_size = ETHER_MAX_LEN;
500 			break;
501 		default:
502 			sc->hw.max_frame_size =
503 			    MAX_JUMBO_FRAME_SIZE;
504 	}
505 
506 	sc->hw.min_frame_size =
507 	    ETHER_MIN_LEN + ETHER_CRC_LEN;
508 
509 	if (em_allocate_desc_rings(sc) != 0) {
510 		printf("%s: Unable to allocate descriptor ring memory\n",
511 		    DEVNAME(sc));
512 		goto err_pci;
513 	}
514 
515 	/* Initialize the hardware */
516 	if ((defer = em_hardware_init(sc))) {
517 		if (defer == EAGAIN)
518 			config_defer(self, em_defer_attach);
519 		else {
520 			printf("%s: Unable to initialize the hardware\n",
521 			    DEVNAME(sc));
522 			goto err_pci;
523 		}
524 	}
525 
526 	if (sc->hw.mac_type == em_80003es2lan || sc->hw.mac_type == em_82575 ||
527 	    sc->hw.mac_type == em_82576 ||
528 	    sc->hw.mac_type == em_82580 || sc->hw.mac_type == em_i210 ||
529 	    sc->hw.mac_type == em_i350) {
530 		uint32_t reg = EM_READ_REG(&sc->hw, E1000_STATUS);
531 		sc->hw.bus_func = (reg & E1000_STATUS_FUNC_MASK) >>
532 		    E1000_STATUS_FUNC_SHIFT;
533 
534 		switch (sc->hw.bus_func) {
535 		case 0:
536 			sc->hw.swfw = E1000_SWFW_PHY0_SM;
537 			break;
538 		case 1:
539 			sc->hw.swfw = E1000_SWFW_PHY1_SM;
540 			break;
541 		case 2:
542 			sc->hw.swfw = E1000_SWFW_PHY2_SM;
543 			break;
544 		case 3:
545 			sc->hw.swfw = E1000_SWFW_PHY3_SM;
546 			break;
547 		}
548 	} else {
549 		sc->hw.bus_func = 0;
550 	}
551 
552 	/* Copy the permanent MAC address out of the EEPROM */
553 	if (em_read_mac_addr(&sc->hw) < 0) {
554 		printf("%s: EEPROM read error while reading mac address\n",
555 		       DEVNAME(sc));
556 		goto err_pci;
557 	}
558 
559 	bcopy(sc->hw.mac_addr, sc->sc_ac.ac_enaddr, ETHER_ADDR_LEN);
560 
561 	/* Setup OS specific network interface */
562 	if (!defer)
563 		em_setup_interface(sc);
564 
565 	/* Initialize statistics */
566 	em_clear_hw_cntrs(&sc->hw);
567 #if NKSTAT > 0
568 	em_kstat_attach(sc);
569 #endif
570 	sc->hw.get_link_status = 1;
571 	if (!defer)
572 		em_update_link_status(sc);
573 
574 #ifdef EM_DEBUG
575 	printf(", mac %#x phy %#x", sc->hw.mac_type, sc->hw.phy_type);
576 #endif
577 	printf(", address %s\n", ether_sprintf(sc->sc_ac.ac_enaddr));
578 
579 	/* Indicate SOL/IDER usage */
580 	if (em_check_phy_reset_block(&sc->hw))
581 		printf("%s: PHY reset is blocked due to SOL/IDER session.\n",
582 		    DEVNAME(sc));
583 
584 	/* Identify 82544 on PCI-X */
585 	em_get_bus_info(&sc->hw);
586 	if (sc->hw.bus_type == em_bus_type_pcix &&
587 	    sc->hw.mac_type == em_82544)
588 		sc->pcix_82544 = TRUE;
589         else
590 		sc->pcix_82544 = FALSE;
591 
592 	sc->hw.icp_xxxx_is_link_up = FALSE;
593 
594 	INIT_DEBUGOUT("em_attach: end");
595 	return;
596 
597 err_pci:
598 	em_free_pci_resources(sc);
599 }
600 
601 /*********************************************************************
602  *  Transmit entry point
603  *
604  *  em_start is called by the stack to initiate a transmit.
605  *  The driver will remain in this routine as long as there are
606  *  packets to transmit and transmit resources are available.
607  *  In case resources are not available stack is notified and
608  *  the packet is requeued.
609  **********************************************************************/
610 
611 void
612 em_start(struct ifqueue *ifq)
613 {
614 	struct ifnet *ifp = ifq->ifq_if;
615 	struct em_softc *sc = ifp->if_softc;
616 	u_int head, free, used;
617 	struct mbuf *m;
618 	int post = 0;
619 	struct em_queue *que = sc->queues; /* Use only first queue. */
620 
621 	if (!sc->link_active) {
622 		ifq_purge(ifq);
623 		return;
624 	}
625 
626 	/* calculate free space */
627 	head = que->tx.sc_tx_desc_head;
628 	free = que->tx.sc_tx_desc_tail;
629 	if (free <= head)
630 		free += sc->sc_tx_slots;
631 	free -= head;
632 
633 	if (sc->hw.mac_type != em_82547) {
634 		bus_dmamap_sync(sc->sc_dmat, que->tx.sc_tx_dma.dma_map,
635 		    0, que->tx.sc_tx_dma.dma_map->dm_mapsize,
636 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
637 	}
638 
639 	for (;;) {
640 		/* use 2 because cksum setup can use an extra slot */
641 		if (EM_MAX_SCATTER + 2 > free) {
642 			ifq_set_oactive(ifq);
643 			break;
644 		}
645 
646 		m = ifq_dequeue(ifq);
647 		if (m == NULL)
648 			break;
649 
650 		used = em_encap(que, m);
651 		if (used == 0) {
652 			m_freem(m);
653 			continue;
654 		}
655 
656 		KASSERT(used <= free);
657 
658 		free -= used;
659 
660 #if NBPFILTER > 0
661 		/* Send a copy of the frame to the BPF listener */
662 		if (ifp->if_bpf)
663 			bpf_mtap_ether(ifp->if_bpf, m, BPF_DIRECTION_OUT);
664 #endif
665 
666 		/* Set timeout in case hardware has problems transmitting */
667 		ifp->if_timer = EM_TX_TIMEOUT;
668 
669 		if (sc->hw.mac_type == em_82547) {
670 			int len = m->m_pkthdr.len;
671 
672 			if (sc->link_duplex == HALF_DUPLEX)
673 				em_82547_move_tail_locked(sc);
674 			else {
675 				E1000_WRITE_REG(&sc->hw, TDT(que->me),
676 				    que->tx.sc_tx_desc_head);
677 				em_82547_update_fifo_head(sc, len);
678 			}
679 		}
680 
681 		post = 1;
682 	}
683 
684 	if (sc->hw.mac_type != em_82547) {
685 		bus_dmamap_sync(sc->sc_dmat, que->tx.sc_tx_dma.dma_map,
686 		    0, que->tx.sc_tx_dma.dma_map->dm_mapsize,
687 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
688 		/*
689 		 * Advance the Transmit Descriptor Tail (Tdt),
690 		 * this tells the E1000 that this frame is
691 		 * available to transmit.
692 		 */
693 		if (post)
694 			E1000_WRITE_REG(&sc->hw, TDT(que->me),
695 			    que->tx.sc_tx_desc_head);
696 	}
697 }
698 
699 /*********************************************************************
700  *  Ioctl entry point
701  *
702  *  em_ioctl is called when the user wants to configure the
703  *  interface.
704  *
705  *  return 0 on success, positive on failure
706  **********************************************************************/
707 
708 int
709 em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
710 {
711 	int		error = 0;
712 	struct ifreq   *ifr = (struct ifreq *) data;
713 	struct em_softc *sc = ifp->if_softc;
714 	int s;
715 
716 	s = splnet();
717 
718 	switch (command) {
719 	case SIOCSIFADDR:
720 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFADDR (Set Interface "
721 			       "Addr)");
722 		if (!(ifp->if_flags & IFF_UP)) {
723 			ifp->if_flags |= IFF_UP;
724 			em_init(sc);
725 		}
726 		break;
727 
728 	case SIOCSIFFLAGS:
729 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFFLAGS (Set Interface Flags)");
730 		if (ifp->if_flags & IFF_UP) {
731 			if (ifp->if_flags & IFF_RUNNING)
732 				error = ENETRESET;
733 			else
734 				em_init(sc);
735 		} else {
736 			if (ifp->if_flags & IFF_RUNNING)
737 				em_stop(sc, 0);
738 		}
739 		break;
740 
741 	case SIOCSIFMEDIA:
742 		/* Check SOL/IDER usage */
743 		if (em_check_phy_reset_block(&sc->hw)) {
744 			printf("%s: Media change is blocked due to SOL/IDER session.\n",
745 			    DEVNAME(sc));
746 			break;
747 		}
748 	case SIOCGIFMEDIA:
749 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCxIFMEDIA (Get/Set Interface Media)");
750 		error = ifmedia_ioctl(ifp, ifr, &sc->media, command);
751 		break;
752 
753 	case SIOCGIFRXR:
754 		error = if_rxr_ioctl((struct if_rxrinfo *)ifr->ifr_data,
755 		    NULL, EM_MCLBYTES, &sc->queues->rx.sc_rx_ring);
756 		break;
757 
758 	default:
759 		error = ether_ioctl(ifp, &sc->sc_ac, command, data);
760 	}
761 
762 	if (error == ENETRESET) {
763 		if (ifp->if_flags & IFF_RUNNING) {
764 			em_disable_intr(sc);
765 			em_iff(sc);
766 			if (sc->hw.mac_type == em_82542_rev2_0)
767 				em_initialize_receive_unit(sc);
768 			em_enable_intr(sc);
769 		}
770 		error = 0;
771 	}
772 
773 	splx(s);
774 	return (error);
775 }
776 
777 /*********************************************************************
778  *  Watchdog entry point
779  *
780  *  This routine is called whenever hardware quits transmitting.
781  *
782  **********************************************************************/
783 
784 void
785 em_watchdog(struct ifnet *ifp)
786 {
787 	struct em_softc *sc = ifp->if_softc;
788 	struct em_queue *que = sc->queues; /* Use only first queue. */
789 
790 
791 	/* If we are in this routine because of pause frames, then
792 	 * don't reset the hardware.
793 	 */
794 	if (E1000_READ_REG(&sc->hw, STATUS) & E1000_STATUS_TXOFF) {
795 		ifp->if_timer = EM_TX_TIMEOUT;
796 		return;
797 	}
798 	printf("%s: watchdog: head %u tail %u TDH %u TDT %u\n",
799 	    DEVNAME(sc),
800 	    que->tx.sc_tx_desc_head, que->tx.sc_tx_desc_tail,
801 	    E1000_READ_REG(&sc->hw, TDH(que->me)),
802 	    E1000_READ_REG(&sc->hw, TDT(que->me)));
803 
804 	em_init(sc);
805 
806 	sc->watchdog_events++;
807 }
808 
809 /*********************************************************************
810  *  Init entry point
811  *
812  *  This routine is used in two ways. It is used by the stack as
813  *  init entry point in network interface structure. It is also used
814  *  by the driver as a hw/sw initialization routine to get to a
815  *  consistent state.
816  *
817  **********************************************************************/
818 
819 void
820 em_init(void *arg)
821 {
822 	struct em_softc *sc = arg;
823 	struct ifnet   *ifp = &sc->sc_ac.ac_if;
824 	uint32_t	pba;
825 	int s;
826 
827 	s = splnet();
828 
829 	INIT_DEBUGOUT("em_init: begin");
830 
831 	em_stop(sc, 0);
832 
833 	/*
834 	 * Packet Buffer Allocation (PBA)
835 	 * Writing PBA sets the receive portion of the buffer
836 	 * the remainder is used for the transmit buffer.
837 	 *
838 	 * Devices before the 82547 had a Packet Buffer of 64K.
839 	 *   Default allocation: PBA=48K for Rx, leaving 16K for Tx.
840 	 * After the 82547 the buffer was reduced to 40K.
841 	 *   Default allocation: PBA=30K for Rx, leaving 10K for Tx.
842 	 *   Note: default does not leave enough room for Jumbo Frame >10k.
843 	 */
844 	switch (sc->hw.mac_type) {
845 	case em_82547:
846 	case em_82547_rev_2: /* 82547: Total Packet Buffer is 40K */
847 		if (sc->hw.max_frame_size > EM_RXBUFFER_8192)
848 			pba = E1000_PBA_22K; /* 22K for Rx, 18K for Tx */
849 		else
850 			pba = E1000_PBA_30K; /* 30K for Rx, 10K for Tx */
851 		sc->tx_fifo_head = 0;
852 		sc->tx_head_addr = pba << EM_TX_HEAD_ADDR_SHIFT;
853 		sc->tx_fifo_size = (E1000_PBA_40K - pba) << EM_PBA_BYTES_SHIFT;
854 		break;
855 	case em_82571:
856 	case em_82572: /* Total Packet Buffer on these is 48k */
857 	case em_82575:
858 	case em_82576:
859 	case em_82580:
860 	case em_80003es2lan:
861 	case em_i350:
862 		pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
863 		break;
864 	case em_i210:
865 		pba = E1000_PBA_34K;
866 		break;
867 	case em_82573: /* 82573: Total Packet Buffer is 32K */
868 		/* Jumbo frames not supported */
869 		pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
870 		break;
871 	case em_82574: /* Total Packet Buffer is 40k */
872 		pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
873 		break;
874 	case em_ich8lan:
875 		pba = E1000_PBA_8K;
876 		break;
877 	case em_ich9lan:
878 	case em_ich10lan:
879 		/* Boost Receive side for jumbo frames */
880 		if (sc->hw.max_frame_size > EM_RXBUFFER_4096)
881 			pba = E1000_PBA_14K;
882 		else
883 			pba = E1000_PBA_10K;
884 		break;
885 	case em_pchlan:
886 	case em_pch2lan:
887 	case em_pch_lpt:
888 	case em_pch_spt:
889 	case em_pch_cnp:
890 		pba = E1000_PBA_26K;
891 		break;
892 	default:
893 		/* Devices before 82547 had a Packet Buffer of 64K.   */
894 		if (sc->hw.max_frame_size > EM_RXBUFFER_8192)
895 			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
896 		else
897 			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
898 	}
899 	INIT_DEBUGOUT1("em_init: pba=%dK",pba);
900 	E1000_WRITE_REG(&sc->hw, PBA, pba);
901 
902 	/* Get the latest mac address, User can use a LAA */
903 	bcopy(sc->sc_ac.ac_enaddr, sc->hw.mac_addr, ETHER_ADDR_LEN);
904 
905 	/* Initialize the hardware */
906 	if (em_hardware_init(sc)) {
907 		printf("%s: Unable to initialize the hardware\n",
908 		       DEVNAME(sc));
909 		splx(s);
910 		return;
911 	}
912 	em_update_link_status(sc);
913 
914 	E1000_WRITE_REG(&sc->hw, VET, ETHERTYPE_VLAN);
915 	if (ifp->if_capabilities & IFCAP_VLAN_HWTAGGING)
916 		em_enable_hw_vlans(sc);
917 
918 	/* Prepare transmit descriptors and buffers */
919 	if (em_setup_transmit_structures(sc)) {
920 		printf("%s: Could not setup transmit structures\n",
921 		       DEVNAME(sc));
922 		em_stop(sc, 0);
923 		splx(s);
924 		return;
925 	}
926 	em_initialize_transmit_unit(sc);
927 
928 	/* Prepare receive descriptors and buffers */
929 	if (em_setup_receive_structures(sc)) {
930 		printf("%s: Could not setup receive structures\n",
931 		       DEVNAME(sc));
932 		em_stop(sc, 0);
933 		splx(s);
934 		return;
935 	}
936 	em_initialize_receive_unit(sc);
937 
938 #ifndef SMALL_KERNEL
939 	if (sc->msix) {
940 		if (em_setup_queues_msix(sc)) {
941 			printf("%s: Can't setup msix queues\n", DEVNAME(sc));
942 			splx(s);
943 			return;
944 		}
945 	}
946 #endif
947 
948 	/* Program promiscuous mode and multicast filters. */
949 	em_iff(sc);
950 
951 	ifp->if_flags |= IFF_RUNNING;
952 	ifq_clr_oactive(&ifp->if_snd);
953 
954 	timeout_add_sec(&sc->timer_handle, 1);
955 	em_clear_hw_cntrs(&sc->hw);
956 	em_enable_intr(sc);
957 
958 	/* Don't reset the phy next time init gets called */
959 	sc->hw.phy_reset_disable = TRUE;
960 
961 	splx(s);
962 }
963 
964 /*********************************************************************
965  *
966  *  Interrupt Service routine
967  *
968  **********************************************************************/
969 int
970 em_intr(void *arg)
971 {
972 	struct em_softc	*sc = arg;
973 	struct em_queue *que = sc->queues; /* single queue */
974 	struct ifnet	*ifp = &sc->sc_ac.ac_if;
975 	u_int32_t	reg_icr, test_icr;
976 
977 	test_icr = reg_icr = E1000_READ_REG(&sc->hw, ICR);
978 	if (sc->hw.mac_type >= em_82571)
979 		test_icr = (reg_icr & E1000_ICR_INT_ASSERTED);
980 	if (!test_icr)
981 		return (0);
982 
983 	if (ifp->if_flags & IFF_RUNNING) {
984 		em_txeof(que);
985 		if (em_rxeof(que))
986 			em_rxrefill(que);
987 	}
988 
989 	/* Link status change */
990 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
991 		KERNEL_LOCK();
992 		sc->hw.get_link_status = 1;
993 		em_check_for_link(&sc->hw);
994 		em_update_link_status(sc);
995 		KERNEL_UNLOCK();
996 	}
997 
998 	return (1);
999 }
1000 
1001 /*********************************************************************
1002  *
1003  *  Media Ioctl callback
1004  *
1005  *  This routine is called whenever the user queries the status of
1006  *  the interface using ifconfig.
1007  *
1008  **********************************************************************/
1009 void
1010 em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1011 {
1012 	struct em_softc *sc = ifp->if_softc;
1013 	uint64_t fiber_type = IFM_1000_SX;
1014 	u_int16_t gsr;
1015 
1016 	INIT_DEBUGOUT("em_media_status: begin");
1017 
1018 	em_check_for_link(&sc->hw);
1019 	em_update_link_status(sc);
1020 
1021 	ifmr->ifm_status = IFM_AVALID;
1022 	ifmr->ifm_active = IFM_ETHER;
1023 
1024 	if (!sc->link_active) {
1025 		ifmr->ifm_active |= IFM_NONE;
1026 		return;
1027 	}
1028 
1029 	ifmr->ifm_status |= IFM_ACTIVE;
1030 
1031 	if (sc->hw.media_type == em_media_type_fiber ||
1032 	    sc->hw.media_type == em_media_type_internal_serdes) {
1033 		if (sc->hw.mac_type == em_82545)
1034 			fiber_type = IFM_1000_LX;
1035 		ifmr->ifm_active |= fiber_type | IFM_FDX;
1036 	} else {
1037 		switch (sc->link_speed) {
1038 		case 10:
1039 			ifmr->ifm_active |= IFM_10_T;
1040 			break;
1041 		case 100:
1042 			ifmr->ifm_active |= IFM_100_TX;
1043 			break;
1044 		case 1000:
1045 			ifmr->ifm_active |= IFM_1000_T;
1046 			break;
1047 		}
1048 
1049 		if (sc->link_duplex == FULL_DUPLEX)
1050 			ifmr->ifm_active |= em_flowstatus(sc) | IFM_FDX;
1051 		else
1052 			ifmr->ifm_active |= IFM_HDX;
1053 
1054 		if (IFM_SUBTYPE(ifmr->ifm_active) == IFM_1000_T) {
1055 			em_read_phy_reg(&sc->hw, PHY_1000T_STATUS, &gsr);
1056 			if (gsr & SR_1000T_MS_CONFIG_RES)
1057 				ifmr->ifm_active |= IFM_ETH_MASTER;
1058 		}
1059 	}
1060 }
1061 
1062 /*********************************************************************
1063  *
1064  *  Media Ioctl callback
1065  *
1066  *  This routine is called when the user changes speed/duplex using
1067  *  media/mediopt option with ifconfig.
1068  *
1069  **********************************************************************/
1070 int
1071 em_media_change(struct ifnet *ifp)
1072 {
1073 	struct em_softc *sc = ifp->if_softc;
1074 	struct ifmedia	*ifm = &sc->media;
1075 
1076 	INIT_DEBUGOUT("em_media_change: begin");
1077 
1078 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1079 		return (EINVAL);
1080 
1081 	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1082 	case IFM_AUTO:
1083 		sc->hw.autoneg = DO_AUTO_NEG;
1084 		sc->hw.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1085 		break;
1086 	case IFM_1000_LX:
1087 	case IFM_1000_SX:
1088 	case IFM_1000_T:
1089 		sc->hw.autoneg = DO_AUTO_NEG;
1090 		sc->hw.autoneg_advertised = ADVERTISE_1000_FULL;
1091 		break;
1092 	case IFM_100_TX:
1093 		sc->hw.autoneg = FALSE;
1094 		sc->hw.autoneg_advertised = 0;
1095 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1096 			sc->hw.forced_speed_duplex = em_100_full;
1097 		else
1098 			sc->hw.forced_speed_duplex = em_100_half;
1099 		break;
1100 	case IFM_10_T:
1101 		sc->hw.autoneg = FALSE;
1102 		sc->hw.autoneg_advertised = 0;
1103 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1104 			sc->hw.forced_speed_duplex = em_10_full;
1105 		else
1106 			sc->hw.forced_speed_duplex = em_10_half;
1107 		break;
1108 	default:
1109 		printf("%s: Unsupported media type\n", DEVNAME(sc));
1110 	}
1111 
1112 	/*
1113 	 * As the speed/duplex settings may have changed we need to
1114 	 * reset the PHY.
1115 	 */
1116 	sc->hw.phy_reset_disable = FALSE;
1117 
1118 	em_init(sc);
1119 
1120 	return (0);
1121 }
1122 
1123 uint64_t
1124 em_flowstatus(struct em_softc *sc)
1125 {
1126 	u_int16_t ar, lpar;
1127 
1128 	if (sc->hw.media_type == em_media_type_fiber ||
1129 	    sc->hw.media_type == em_media_type_internal_serdes)
1130 		return (0);
1131 
1132 	em_read_phy_reg(&sc->hw, PHY_AUTONEG_ADV, &ar);
1133 	em_read_phy_reg(&sc->hw, PHY_LP_ABILITY, &lpar);
1134 
1135 	if ((ar & NWAY_AR_PAUSE) && (lpar & NWAY_LPAR_PAUSE))
1136 		return (IFM_FLOW|IFM_ETH_TXPAUSE|IFM_ETH_RXPAUSE);
1137 	else if (!(ar & NWAY_AR_PAUSE) && (ar & NWAY_AR_ASM_DIR) &&
1138 		(lpar & NWAY_LPAR_PAUSE) && (lpar & NWAY_LPAR_ASM_DIR))
1139 		return (IFM_FLOW|IFM_ETH_TXPAUSE);
1140 	else if ((ar & NWAY_AR_PAUSE) && (ar & NWAY_AR_ASM_DIR) &&
1141 		!(lpar & NWAY_LPAR_PAUSE) && (lpar & NWAY_LPAR_ASM_DIR))
1142 		return (IFM_FLOW|IFM_ETH_RXPAUSE);
1143 
1144 	return (0);
1145 }
1146 
1147 /*********************************************************************
1148  *
1149  *  This routine maps the mbufs to tx descriptors.
1150  *
1151  *  return 0 on success, positive on failure
1152  **********************************************************************/
1153 u_int
1154 em_encap(struct em_queue *que, struct mbuf *m)
1155 {
1156 	struct em_softc *sc = que->sc;
1157 	struct em_packet *pkt;
1158 	struct em_tx_desc *desc;
1159 	bus_dmamap_t map;
1160 	u_int32_t txd_upper, txd_lower;
1161 	u_int head, last, used = 0;
1162 	int i, j;
1163 
1164 	/* For 82544 Workaround */
1165 	DESC_ARRAY		desc_array;
1166 	u_int32_t		array_elements;
1167 
1168 	/* get a dmamap for this packet from the next free slot */
1169 	head = que->tx.sc_tx_desc_head;
1170 	pkt = &que->tx.sc_tx_pkts_ring[head];
1171 	map = pkt->pkt_map;
1172 
1173 	switch (bus_dmamap_load_mbuf(sc->sc_dmat, map, m, BUS_DMA_NOWAIT)) {
1174 	case 0:
1175 		break;
1176 	case EFBIG:
1177 		if (m_defrag(m, M_DONTWAIT) == 0 &&
1178 		    bus_dmamap_load_mbuf(sc->sc_dmat, map, m,
1179 		     BUS_DMA_NOWAIT) == 0)
1180 			break;
1181 
1182 		/* FALLTHROUGH */
1183 	default:
1184 		sc->no_tx_dma_setup++;
1185 		return (0);
1186 	}
1187 
1188 	bus_dmamap_sync(sc->sc_dmat, map,
1189 	    0, map->dm_mapsize,
1190 	    BUS_DMASYNC_PREWRITE);
1191 
1192 	if (sc->hw.mac_type == em_82547) {
1193 		bus_dmamap_sync(sc->sc_dmat, que->tx.sc_tx_dma.dma_map,
1194 		    0, que->tx.sc_tx_dma.dma_map->dm_mapsize,
1195 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1196 	}
1197 
1198 	if (sc->hw.mac_type >= em_82543 && sc->hw.mac_type != em_82575 &&
1199 	    sc->hw.mac_type != em_82576 &&
1200 	    sc->hw.mac_type != em_82580 && sc->hw.mac_type != em_i210 &&
1201 	    sc->hw.mac_type != em_i350) {
1202 		used += em_transmit_checksum_setup(que, m, head,
1203 		    &txd_upper, &txd_lower);
1204 	} else {
1205 		txd_upper = txd_lower = 0;
1206 	}
1207 
1208 	head += used;
1209 	if (head >= sc->sc_tx_slots)
1210 		head -= sc->sc_tx_slots;
1211 
1212 	for (i = 0; i < map->dm_nsegs; i++) {
1213 		/* If sc is 82544 and on PCI-X bus */
1214 		if (sc->pcix_82544) {
1215 			/*
1216 			 * Check the Address and Length combination and
1217 			 * split the data accordingly
1218 			 */
1219 			array_elements = em_fill_descriptors(
1220 			    map->dm_segs[i].ds_addr, map->dm_segs[i].ds_len,
1221 			    &desc_array);
1222 			for (j = 0; j < array_elements; j++) {
1223 				desc = &que->tx.sc_tx_desc_ring[head];
1224 
1225 				desc->buffer_addr = htole64(
1226 					desc_array.descriptor[j].address);
1227 				desc->lower.data = htole32(
1228 					(que->tx.sc_txd_cmd | txd_lower |
1229 					 (u_int16_t)desc_array.descriptor[j].length));
1230 				desc->upper.data = htole32(txd_upper);
1231 
1232 				last = head;
1233 				if (++head == sc->sc_tx_slots)
1234 					head = 0;
1235 
1236 				used++;
1237 			}
1238 		} else {
1239 			desc = &que->tx.sc_tx_desc_ring[head];
1240 
1241 			desc->buffer_addr = htole64(map->dm_segs[i].ds_addr);
1242 			desc->lower.data = htole32(que->tx.sc_txd_cmd |
1243 			    txd_lower | map->dm_segs[i].ds_len);
1244 			desc->upper.data = htole32(txd_upper);
1245 
1246 			last = head;
1247 			if (++head == sc->sc_tx_slots)
1248 	        		head = 0;
1249 
1250 			used++;
1251 		}
1252 	}
1253 
1254 #if NVLAN > 0
1255 	/* Find out if we are in VLAN mode */
1256 	if (m->m_flags & M_VLANTAG) {
1257 		/* Set the VLAN id */
1258 		desc->upper.fields.special = htole16(m->m_pkthdr.ether_vtag);
1259 
1260 		/* Tell hardware to add tag */
1261 		desc->lower.data |= htole32(E1000_TXD_CMD_VLE);
1262 	}
1263 #endif
1264 
1265 	/* mark the packet with the mbuf and last desc slot */
1266 	pkt->pkt_m = m;
1267 	pkt->pkt_eop = last;
1268 
1269 	que->tx.sc_tx_desc_head = head;
1270 
1271 	/*
1272 	 * Last Descriptor of Packet
1273 	 * needs End Of Packet (EOP)
1274 	 * and Report Status (RS)
1275 	 */
1276 	desc->lower.data |= htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
1277 
1278 	if (sc->hw.mac_type == em_82547) {
1279 		bus_dmamap_sync(sc->sc_dmat, que->tx.sc_tx_dma.dma_map,
1280 		    0, que->tx.sc_tx_dma.dma_map->dm_mapsize,
1281 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1282 	}
1283 
1284 	return (used);
1285 }
1286 
1287 /*********************************************************************
1288  *
1289  * 82547 workaround to avoid controller hang in half-duplex environment.
1290  * The workaround is to avoid queuing a large packet that would span
1291  * the internal Tx FIFO ring boundary. We need to reset the FIFO pointers
1292  * in this case. We do that only when FIFO is quiescent.
1293  *
1294  **********************************************************************/
1295 void
1296 em_82547_move_tail_locked(struct em_softc *sc)
1297 {
1298 	uint16_t hw_tdt;
1299 	uint16_t sw_tdt;
1300 	struct em_tx_desc *tx_desc;
1301 	uint16_t length = 0;
1302 	boolean_t eop = 0;
1303 	struct em_queue *que = sc->queues; /* single queue chip */
1304 
1305 	hw_tdt = E1000_READ_REG(&sc->hw, TDT(que->me));
1306 	sw_tdt = que->tx.sc_tx_desc_head;
1307 
1308 	while (hw_tdt != sw_tdt) {
1309 		tx_desc = &que->tx.sc_tx_desc_ring[hw_tdt];
1310 		length += tx_desc->lower.flags.length;
1311 		eop = tx_desc->lower.data & E1000_TXD_CMD_EOP;
1312 		if (++hw_tdt == sc->sc_tx_slots)
1313 			hw_tdt = 0;
1314 
1315 		if (eop) {
1316 			if (em_82547_fifo_workaround(sc, length)) {
1317 				sc->tx_fifo_wrk_cnt++;
1318 				timeout_add(&sc->tx_fifo_timer_handle, 1);
1319 				break;
1320 			}
1321 			E1000_WRITE_REG(&sc->hw, TDT(que->me), hw_tdt);
1322 			em_82547_update_fifo_head(sc, length);
1323 			length = 0;
1324 		}
1325 	}
1326 }
1327 
1328 void
1329 em_82547_move_tail(void *arg)
1330 {
1331 	struct em_softc *sc = arg;
1332 	int s;
1333 
1334 	s = splnet();
1335 	em_82547_move_tail_locked(sc);
1336 	splx(s);
1337 }
1338 
1339 int
1340 em_82547_fifo_workaround(struct em_softc *sc, int len)
1341 {
1342 	int fifo_space, fifo_pkt_len;
1343 
1344 	fifo_pkt_len = EM_ROUNDUP(len + EM_FIFO_HDR, EM_FIFO_HDR);
1345 
1346 	if (sc->link_duplex == HALF_DUPLEX) {
1347 		fifo_space = sc->tx_fifo_size - sc->tx_fifo_head;
1348 
1349 		if (fifo_pkt_len >= (EM_82547_PKT_THRESH + fifo_space)) {
1350 			if (em_82547_tx_fifo_reset(sc))
1351 				return (0);
1352 			else
1353 				return (1);
1354 		}
1355 	}
1356 
1357 	return (0);
1358 }
1359 
1360 void
1361 em_82547_update_fifo_head(struct em_softc *sc, int len)
1362 {
1363 	int fifo_pkt_len = EM_ROUNDUP(len + EM_FIFO_HDR, EM_FIFO_HDR);
1364 
1365 	/* tx_fifo_head is always 16 byte aligned */
1366 	sc->tx_fifo_head += fifo_pkt_len;
1367 	if (sc->tx_fifo_head >= sc->tx_fifo_size)
1368 		sc->tx_fifo_head -= sc->tx_fifo_size;
1369 }
1370 
1371 int
1372 em_82547_tx_fifo_reset(struct em_softc *sc)
1373 {
1374 	uint32_t tctl;
1375 	struct em_queue *que = sc->queues; /* single queue chip */
1376 
1377 	if ((E1000_READ_REG(&sc->hw, TDT(que->me)) ==
1378 	     E1000_READ_REG(&sc->hw, TDH(que->me))) &&
1379 	    (E1000_READ_REG(&sc->hw, TDFT) ==
1380 	     E1000_READ_REG(&sc->hw, TDFH)) &&
1381 	    (E1000_READ_REG(&sc->hw, TDFTS) ==
1382 	     E1000_READ_REG(&sc->hw, TDFHS)) &&
1383 	    (E1000_READ_REG(&sc->hw, TDFPC) == 0)) {
1384 
1385 		/* Disable TX unit */
1386 		tctl = E1000_READ_REG(&sc->hw, TCTL);
1387 		E1000_WRITE_REG(&sc->hw, TCTL, tctl & ~E1000_TCTL_EN);
1388 
1389 		/* Reset FIFO pointers */
1390 		E1000_WRITE_REG(&sc->hw, TDFT, sc->tx_head_addr);
1391 		E1000_WRITE_REG(&sc->hw, TDFH, sc->tx_head_addr);
1392 		E1000_WRITE_REG(&sc->hw, TDFTS, sc->tx_head_addr);
1393 		E1000_WRITE_REG(&sc->hw, TDFHS, sc->tx_head_addr);
1394 
1395 		/* Re-enable TX unit */
1396 		E1000_WRITE_REG(&sc->hw, TCTL, tctl);
1397 		E1000_WRITE_FLUSH(&sc->hw);
1398 
1399 		sc->tx_fifo_head = 0;
1400 		sc->tx_fifo_reset_cnt++;
1401 
1402 		return (TRUE);
1403 	} else
1404 		return (FALSE);
1405 }
1406 
1407 void
1408 em_iff(struct em_softc *sc)
1409 {
1410 	struct ifnet *ifp = &sc->sc_ac.ac_if;
1411 	struct arpcom *ac = &sc->sc_ac;
1412 	u_int32_t reg_rctl = 0;
1413 	u_int8_t  mta[MAX_NUM_MULTICAST_ADDRESSES * ETH_LENGTH_OF_ADDRESS];
1414 	struct ether_multi *enm;
1415 	struct ether_multistep step;
1416 	int i = 0;
1417 
1418 	IOCTL_DEBUGOUT("em_iff: begin");
1419 
1420 	if (sc->hw.mac_type == em_82542_rev2_0) {
1421 		reg_rctl = E1000_READ_REG(&sc->hw, RCTL);
1422 		if (sc->hw.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
1423 			em_pci_clear_mwi(&sc->hw);
1424 		reg_rctl |= E1000_RCTL_RST;
1425 		E1000_WRITE_REG(&sc->hw, RCTL, reg_rctl);
1426 		msec_delay(5);
1427 	}
1428 
1429 	reg_rctl = E1000_READ_REG(&sc->hw, RCTL);
1430 	reg_rctl &= ~(E1000_RCTL_MPE | E1000_RCTL_UPE);
1431 	ifp->if_flags &= ~IFF_ALLMULTI;
1432 
1433 	if (ifp->if_flags & IFF_PROMISC || ac->ac_multirangecnt > 0 ||
1434 	    ac->ac_multicnt > MAX_NUM_MULTICAST_ADDRESSES) {
1435 		ifp->if_flags |= IFF_ALLMULTI;
1436 		reg_rctl |= E1000_RCTL_MPE;
1437 		if (ifp->if_flags & IFF_PROMISC)
1438 			reg_rctl |= E1000_RCTL_UPE;
1439 	} else {
1440 		ETHER_FIRST_MULTI(step, ac, enm);
1441 		while (enm != NULL) {
1442 			bcopy(enm->enm_addrlo, mta + i, ETH_LENGTH_OF_ADDRESS);
1443 			i += ETH_LENGTH_OF_ADDRESS;
1444 
1445 			ETHER_NEXT_MULTI(step, enm);
1446 		}
1447 
1448 		em_mc_addr_list_update(&sc->hw, mta, ac->ac_multicnt, 0, 1);
1449 	}
1450 
1451 	E1000_WRITE_REG(&sc->hw, RCTL, reg_rctl);
1452 
1453 	if (sc->hw.mac_type == em_82542_rev2_0) {
1454 		reg_rctl = E1000_READ_REG(&sc->hw, RCTL);
1455 		reg_rctl &= ~E1000_RCTL_RST;
1456 		E1000_WRITE_REG(&sc->hw, RCTL, reg_rctl);
1457 		msec_delay(5);
1458 		if (sc->hw.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
1459 			em_pci_set_mwi(&sc->hw);
1460 	}
1461 }
1462 
1463 /*********************************************************************
1464  *  Timer routine
1465  *
1466  *  This routine checks for link status and updates statistics.
1467  *
1468  **********************************************************************/
1469 
1470 void
1471 em_local_timer(void *arg)
1472 {
1473 	struct em_softc *sc = arg;
1474 	int s;
1475 
1476 	timeout_add_sec(&sc->timer_handle, 1);
1477 
1478 	s = splnet();
1479 	em_smartspeed(sc);
1480 	splx(s);
1481 
1482 #if NKSTAT > 0
1483 	if (sc->kstat != NULL && mtx_enter_try(&sc->kstat_mtx)) {
1484 		em_kstat_read(sc->kstat);
1485 		mtx_leave(&sc->kstat_mtx);
1486 	}
1487 #endif
1488 }
1489 
1490 void
1491 em_update_link_status(struct em_softc *sc)
1492 {
1493 	struct ifnet *ifp = &sc->sc_ac.ac_if;
1494 	u_char link_state;
1495 
1496 	if (E1000_READ_REG(&sc->hw, STATUS) & E1000_STATUS_LU) {
1497 		if (sc->link_active == 0) {
1498 			em_get_speed_and_duplex(&sc->hw,
1499 						&sc->link_speed,
1500 						&sc->link_duplex);
1501 			/* Check if we may set SPEED_MODE bit on PCI-E */
1502 			if ((sc->link_speed == SPEED_1000) &&
1503 			    ((sc->hw.mac_type == em_82571) ||
1504 			    (sc->hw.mac_type == em_82572) ||
1505 			    (sc->hw.mac_type == em_82575) ||
1506 			    (sc->hw.mac_type == em_82576) ||
1507 			    (sc->hw.mac_type == em_82580))) {
1508 				int tarc0;
1509 
1510 				tarc0 = E1000_READ_REG(&sc->hw, TARC0);
1511 				tarc0 |= SPEED_MODE_BIT;
1512 				E1000_WRITE_REG(&sc->hw, TARC0, tarc0);
1513 			}
1514 			sc->link_active = 1;
1515 			sc->smartspeed = 0;
1516 			ifp->if_baudrate = IF_Mbps(sc->link_speed);
1517 		}
1518 		link_state = (sc->link_duplex == FULL_DUPLEX) ?
1519 		    LINK_STATE_FULL_DUPLEX : LINK_STATE_HALF_DUPLEX;
1520 	} else {
1521 		if (sc->link_active == 1) {
1522 			ifp->if_baudrate = sc->link_speed = 0;
1523 			sc->link_duplex = 0;
1524 			sc->link_active = 0;
1525 		}
1526 		link_state = LINK_STATE_DOWN;
1527 	}
1528 	if (ifp->if_link_state != link_state) {
1529 		ifp->if_link_state = link_state;
1530 		if_link_state_change(ifp);
1531 	}
1532 }
1533 
1534 /*********************************************************************
1535  *
1536  *  This routine disables all traffic on the adapter by issuing a
1537  *  global reset on the MAC and deallocates TX/RX buffers.
1538  *
1539  **********************************************************************/
1540 
1541 void
1542 em_stop(void *arg, int softonly)
1543 {
1544 	struct em_softc *sc = arg;
1545 	struct em_queue *que = sc->queues; /* Use only first queue. */
1546 	struct ifnet   *ifp = &sc->sc_ac.ac_if;
1547 
1548 	/* Tell the stack that the interface is no longer active */
1549 	ifp->if_flags &= ~IFF_RUNNING;
1550 
1551 	INIT_DEBUGOUT("em_stop: begin");
1552 
1553 	timeout_del(&que->rx_refill);
1554 	timeout_del(&sc->timer_handle);
1555 	timeout_del(&sc->tx_fifo_timer_handle);
1556 
1557 	if (!softonly)
1558 		em_disable_intr(sc);
1559 	if (sc->hw.mac_type >= em_pch_spt)
1560 		em_flush_desc_rings(sc);
1561 	if (!softonly)
1562 		em_reset_hw(&sc->hw);
1563 
1564 	intr_barrier(sc->sc_intrhand);
1565 	ifq_barrier(&ifp->if_snd);
1566 
1567 	KASSERT((ifp->if_flags & IFF_RUNNING) == 0);
1568 
1569 	ifq_clr_oactive(&ifp->if_snd);
1570 	ifp->if_timer = 0;
1571 
1572 	em_free_transmit_structures(sc);
1573 	em_free_receive_structures(sc);
1574 }
1575 
1576 /*********************************************************************
1577  *
1578  *  Determine hardware revision.
1579  *
1580  **********************************************************************/
1581 void
1582 em_identify_hardware(struct em_softc *sc)
1583 {
1584 	u_int32_t reg;
1585 	struct pci_attach_args *pa = &sc->osdep.em_pa;
1586 
1587 	/* Make sure our PCI config space has the necessary stuff set */
1588 	sc->hw.pci_cmd_word = pci_conf_read(pa->pa_pc, pa->pa_tag,
1589 					    PCI_COMMAND_STATUS_REG);
1590 
1591 	/* Save off the information about this board */
1592 	sc->hw.vendor_id = PCI_VENDOR(pa->pa_id);
1593 	sc->hw.device_id = PCI_PRODUCT(pa->pa_id);
1594 
1595 	reg = pci_conf_read(pa->pa_pc, pa->pa_tag, PCI_CLASS_REG);
1596 	sc->hw.revision_id = PCI_REVISION(reg);
1597 
1598 	reg = pci_conf_read(pa->pa_pc, pa->pa_tag, PCI_SUBSYS_ID_REG);
1599 	sc->hw.subsystem_vendor_id = PCI_VENDOR(reg);
1600 	sc->hw.subsystem_id = PCI_PRODUCT(reg);
1601 
1602 	/* Identify the MAC */
1603 	if (em_set_mac_type(&sc->hw))
1604 		printf("%s: Unknown MAC Type\n", DEVNAME(sc));
1605 
1606 	if (sc->hw.mac_type == em_pchlan)
1607 		sc->hw.revision_id = PCI_PRODUCT(pa->pa_id) & 0x0f;
1608 
1609 	if (sc->hw.mac_type == em_82541 ||
1610 	    sc->hw.mac_type == em_82541_rev_2 ||
1611 	    sc->hw.mac_type == em_82547 ||
1612 	    sc->hw.mac_type == em_82547_rev_2)
1613 		sc->hw.phy_init_script = TRUE;
1614 }
1615 
1616 void
1617 em_legacy_irq_quirk_spt(struct em_softc *sc)
1618 {
1619 	uint32_t	reg;
1620 
1621 	/* Legacy interrupt: SPT needs a quirk. */
1622 	if (sc->hw.mac_type != em_pch_spt && sc->hw.mac_type != em_pch_cnp)
1623 		return;
1624 	if (sc->legacy_irq == 0)
1625 		return;
1626 
1627 	reg = EM_READ_REG(&sc->hw, E1000_FEXTNVM7);
1628 	reg |= E1000_FEXTNVM7_SIDE_CLK_UNGATE;
1629 	EM_WRITE_REG(&sc->hw, E1000_FEXTNVM7, reg);
1630 
1631 	reg = EM_READ_REG(&sc->hw, E1000_FEXTNVM9);
1632 	reg |= E1000_FEXTNVM9_IOSFSB_CLKGATE_DIS |
1633 	    E1000_FEXTNVM9_IOSFSB_CLKREQ_DIS;
1634 	EM_WRITE_REG(&sc->hw, E1000_FEXTNVM9, reg);
1635 }
1636 
1637 int
1638 em_allocate_pci_resources(struct em_softc *sc)
1639 {
1640 	int		val, rid;
1641 	struct pci_attach_args *pa = &sc->osdep.em_pa;
1642 	struct em_queue	       *que = NULL;
1643 
1644 	val = pci_conf_read(pa->pa_pc, pa->pa_tag, EM_MMBA);
1645 	if (PCI_MAPREG_TYPE(val) != PCI_MAPREG_TYPE_MEM) {
1646 		printf(": mmba is not mem space\n");
1647 		return (ENXIO);
1648 	}
1649 	if (pci_mapreg_map(pa, EM_MMBA, PCI_MAPREG_MEM_TYPE(val), 0,
1650 	    &sc->osdep.mem_bus_space_tag, &sc->osdep.mem_bus_space_handle,
1651 	    &sc->osdep.em_membase, &sc->osdep.em_memsize, 0)) {
1652 		printf(": cannot find mem space\n");
1653 		return (ENXIO);
1654 	}
1655 
1656 	switch (sc->hw.mac_type) {
1657 	case em_82544:
1658 	case em_82540:
1659 	case em_82545:
1660 	case em_82546:
1661 	case em_82541:
1662 	case em_82541_rev_2:
1663 		/* Figure out where our I/O BAR is ? */
1664 		for (rid = PCI_MAPREG_START; rid < PCI_MAPREG_END;) {
1665 			val = pci_conf_read(pa->pa_pc, pa->pa_tag, rid);
1666 			if (PCI_MAPREG_TYPE(val) == PCI_MAPREG_TYPE_IO) {
1667 				sc->io_rid = rid;
1668 				break;
1669 			}
1670 			rid += 4;
1671 			if (PCI_MAPREG_MEM_TYPE(val) ==
1672 			    PCI_MAPREG_MEM_TYPE_64BIT)
1673 				rid += 4;	/* skip high bits, too */
1674 		}
1675 
1676 		if (pci_mapreg_map(pa, rid, PCI_MAPREG_TYPE_IO, 0,
1677 		    &sc->osdep.io_bus_space_tag, &sc->osdep.io_bus_space_handle,
1678 		    &sc->osdep.em_iobase, &sc->osdep.em_iosize, 0)) {
1679 			printf(": cannot find i/o space\n");
1680 			return (ENXIO);
1681 		}
1682 
1683 		sc->hw.io_base = 0;
1684 		break;
1685 	default:
1686 		break;
1687 	}
1688 
1689 	sc->osdep.em_flashoffset = 0;
1690 	/* for ICH8 and family we need to find the flash memory */
1691 	if (sc->hw.mac_type >= em_pch_spt) {
1692 		sc->osdep.flash_bus_space_tag = sc->osdep.mem_bus_space_tag;
1693 		sc->osdep.flash_bus_space_handle = sc->osdep.mem_bus_space_handle;
1694 		sc->osdep.em_flashbase = 0;
1695 		sc->osdep.em_flashsize = 0;
1696 		sc->osdep.em_flashoffset = 0xe000;
1697 	} else if (IS_ICH8(sc->hw.mac_type)) {
1698 		val = pci_conf_read(pa->pa_pc, pa->pa_tag, EM_FLASH);
1699 		if (PCI_MAPREG_TYPE(val) != PCI_MAPREG_TYPE_MEM) {
1700 			printf(": flash is not mem space\n");
1701 			return (ENXIO);
1702 		}
1703 
1704 		if (pci_mapreg_map(pa, EM_FLASH, PCI_MAPREG_MEM_TYPE(val), 0,
1705 		    &sc->osdep.flash_bus_space_tag, &sc->osdep.flash_bus_space_handle,
1706 		    &sc->osdep.em_flashbase, &sc->osdep.em_flashsize, 0)) {
1707 			printf(": cannot find mem space\n");
1708 			return (ENXIO);
1709 		}
1710         }
1711 
1712 	sc->osdep.dev = (struct device *)sc;
1713 	sc->hw.back = &sc->osdep;
1714 
1715 	/* Only one queue for the moment. */
1716 	que = malloc(sizeof(struct em_queue), M_DEVBUF, M_NOWAIT | M_ZERO);
1717 	if (que == NULL) {
1718 		printf(": unable to allocate queue memory\n");
1719 		return (ENOMEM);
1720 	}
1721 	que->me = 0;
1722 	que->sc = sc;
1723 	timeout_set(&que->rx_refill, em_rxrefill, que);
1724 
1725 	sc->queues = que;
1726 	sc->num_queues = 1;
1727 	sc->msix = 0;
1728 	sc->legacy_irq = 0;
1729 	if (em_allocate_msix(sc) && em_allocate_legacy(sc))
1730 		return (ENXIO);
1731 
1732 	/*
1733 	 * the ICP_xxxx device has multiple, duplicate register sets for
1734 	 * use when it is being used as a network processor. Disable those
1735 	 * registers here, as they are not necessary in this context and
1736 	 * can confuse the system
1737 	 */
1738 	if(sc->hw.mac_type == em_icp_xxxx) {
1739 		int offset;
1740 		pcireg_t val;
1741 
1742 		if (!pci_get_capability(sc->osdep.em_pa.pa_pc,
1743 		    sc->osdep.em_pa.pa_tag, PCI_CAP_ID_ST, &offset, &val)) {
1744 			return (0);
1745 		}
1746 		offset += PCI_ST_SMIA_OFFSET;
1747 		pci_conf_write(sc->osdep.em_pa.pa_pc, sc->osdep.em_pa.pa_tag,
1748 		    offset, 0x06);
1749 		E1000_WRITE_REG(&sc->hw, IMC1, ~0x0);
1750 		E1000_WRITE_REG(&sc->hw, IMC2, ~0x0);
1751 	}
1752 	return (0);
1753 }
1754 
1755 void
1756 em_free_pci_resources(struct em_softc *sc)
1757 {
1758 	struct pci_attach_args *pa = &sc->osdep.em_pa;
1759 	pci_chipset_tag_t	pc = pa->pa_pc;
1760 	struct em_queue	       *que = NULL;
1761 	if (sc->sc_intrhand)
1762 		pci_intr_disestablish(pc, sc->sc_intrhand);
1763 	sc->sc_intrhand = 0;
1764 
1765 	if (sc->osdep.em_flashbase)
1766 		bus_space_unmap(sc->osdep.flash_bus_space_tag, sc->osdep.flash_bus_space_handle,
1767 				sc->osdep.em_flashsize);
1768 	sc->osdep.em_flashbase = 0;
1769 
1770 	if (sc->osdep.em_iobase)
1771 		bus_space_unmap(sc->osdep.io_bus_space_tag, sc->osdep.io_bus_space_handle,
1772 				sc->osdep.em_iosize);
1773 	sc->osdep.em_iobase = 0;
1774 
1775 	if (sc->osdep.em_membase)
1776 		bus_space_unmap(sc->osdep.mem_bus_space_tag, sc->osdep.mem_bus_space_handle,
1777 				sc->osdep.em_memsize);
1778 	sc->osdep.em_membase = 0;
1779 
1780 	FOREACH_QUEUE(sc, que) {
1781 		if (que->rx.sc_rx_desc_ring != NULL) {
1782 			que->rx.sc_rx_desc_ring = NULL;
1783 			em_dma_free(sc, &que->rx.sc_rx_dma);
1784 		}
1785 		if (que->tx.sc_tx_desc_ring != NULL) {
1786 			que->tx.sc_tx_desc_ring = NULL;
1787 			em_dma_free(sc, &que->tx.sc_tx_dma);
1788 		}
1789 		if (que->tag)
1790 			pci_intr_disestablish(pc, que->tag);
1791 		que->tag = NULL;
1792 		que->eims = 0;
1793 		que->me = 0;
1794 		que->sc = NULL;
1795 	}
1796 	sc->legacy_irq = 0;
1797 	sc->msix_linkvec = 0;
1798 	sc->msix_queuesmask = 0;
1799 	if (sc->queues)
1800 		free(sc->queues, M_DEVBUF,
1801 		    sc->num_queues * sizeof(struct em_queue));
1802 	sc->num_queues = 0;
1803 	sc->queues = NULL;
1804 }
1805 
1806 /*********************************************************************
1807  *
1808  *  Initialize the hardware to a configuration as specified by the
1809  *  em_softc structure. The controller is reset, the EEPROM is
1810  *  verified, the MAC address is set, then the shared initialization
1811  *  routines are called.
1812  *
1813  **********************************************************************/
1814 int
1815 em_hardware_init(struct em_softc *sc)
1816 {
1817 	uint32_t ret_val;
1818 	u_int16_t rx_buffer_size;
1819 
1820 	INIT_DEBUGOUT("em_hardware_init: begin");
1821 	if (sc->hw.mac_type >= em_pch_spt)
1822 		em_flush_desc_rings(sc);
1823 	/* Issue a global reset */
1824 	em_reset_hw(&sc->hw);
1825 
1826 	/* When hardware is reset, fifo_head is also reset */
1827 	sc->tx_fifo_head = 0;
1828 
1829 	/* Make sure we have a good EEPROM before we read from it */
1830 	if (em_get_flash_presence_i210(&sc->hw) &&
1831 	    em_validate_eeprom_checksum(&sc->hw) < 0) {
1832 		/*
1833 		 * Some PCIe parts fail the first check due to
1834 		 * the link being in sleep state, call it again,
1835 		 * if it fails a second time its a real issue.
1836 		 */
1837 		if (em_validate_eeprom_checksum(&sc->hw) < 0) {
1838 			printf("%s: The EEPROM Checksum Is Not Valid\n",
1839 			       DEVNAME(sc));
1840 			return (EIO);
1841 		}
1842 	}
1843 
1844 	if (em_get_flash_presence_i210(&sc->hw) &&
1845 	    em_read_part_num(&sc->hw, &(sc->part_num)) < 0) {
1846 		printf("%s: EEPROM read error while reading part number\n",
1847 		       DEVNAME(sc));
1848 		return (EIO);
1849 	}
1850 
1851 	/* Set up smart power down as default off on newer adapters */
1852 	if (!em_smart_pwr_down &&
1853 	     (sc->hw.mac_type == em_82571 ||
1854 	      sc->hw.mac_type == em_82572 ||
1855 	      sc->hw.mac_type == em_82575 ||
1856 	      sc->hw.mac_type == em_82576 ||
1857 	      sc->hw.mac_type == em_82580 ||
1858 	      sc->hw.mac_type == em_i210 ||
1859 	      sc->hw.mac_type == em_i350 )) {
1860 		uint16_t phy_tmp = 0;
1861 
1862 		/* Speed up time to link by disabling smart power down */
1863 		em_read_phy_reg(&sc->hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
1864 		phy_tmp &= ~IGP02E1000_PM_SPD;
1865 		em_write_phy_reg(&sc->hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
1866 	}
1867 
1868 	em_legacy_irq_quirk_spt(sc);
1869 
1870 	/*
1871 	 * These parameters control the automatic generation (Tx) and
1872 	 * response (Rx) to Ethernet PAUSE frames.
1873 	 * - High water mark should allow for at least two frames to be
1874 	 *   received after sending an XOFF.
1875 	 * - Low water mark works best when it is very near the high water mark.
1876 	 *   This allows the receiver to restart by sending XON when it has
1877 	 *   drained a bit.  Here we use an arbitary value of 1500 which will
1878 	 *   restart after one full frame is pulled from the buffer.  There
1879 	 *   could be several smaller frames in the buffer and if so they will
1880 	 *   not trigger the XON until their total number reduces the buffer
1881 	 *   by 1500.
1882 	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
1883 	 */
1884 	rx_buffer_size = ((E1000_READ_REG(&sc->hw, PBA) & 0xffff) << 10 );
1885 
1886 	sc->hw.fc_high_water = rx_buffer_size -
1887 	    EM_ROUNDUP(sc->hw.max_frame_size, 1024);
1888 	sc->hw.fc_low_water = sc->hw.fc_high_water - 1500;
1889 	if (sc->hw.mac_type == em_80003es2lan)
1890 		sc->hw.fc_pause_time = 0xFFFF;
1891 	else
1892 		sc->hw.fc_pause_time = 1000;
1893 	sc->hw.fc_send_xon = TRUE;
1894 	sc->hw.fc = E1000_FC_FULL;
1895 
1896 	em_disable_aspm(sc);
1897 
1898 	if ((ret_val = em_init_hw(sc)) != 0) {
1899 		if (ret_val == E1000_DEFER_INIT) {
1900 			INIT_DEBUGOUT("\nHardware Initialization Deferred ");
1901 			return (EAGAIN);
1902 		}
1903 		printf("\n%s: Hardware Initialization Failed: %d\n",
1904 		       DEVNAME(sc), ret_val);
1905 		return (EIO);
1906 	}
1907 
1908 	em_check_for_link(&sc->hw);
1909 
1910 	return (0);
1911 }
1912 
1913 /*********************************************************************
1914  *
1915  *  Setup networking device structure and register an interface.
1916  *
1917  **********************************************************************/
1918 void
1919 em_setup_interface(struct em_softc *sc)
1920 {
1921 	struct ifnet   *ifp;
1922 	uint64_t fiber_type = IFM_1000_SX;
1923 
1924 	INIT_DEBUGOUT("em_setup_interface: begin");
1925 
1926 	ifp = &sc->sc_ac.ac_if;
1927 	strlcpy(ifp->if_xname, DEVNAME(sc), IFNAMSIZ);
1928 	ifp->if_softc = sc;
1929 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1930 	ifp->if_xflags = IFXF_MPSAFE;
1931 	ifp->if_ioctl = em_ioctl;
1932 	ifp->if_qstart = em_start;
1933 	ifp->if_watchdog = em_watchdog;
1934 	ifp->if_hardmtu =
1935 		sc->hw.max_frame_size - ETHER_HDR_LEN - ETHER_CRC_LEN;
1936 	ifq_set_maxlen(&ifp->if_snd, sc->sc_tx_slots - 1);
1937 
1938 	ifp->if_capabilities = IFCAP_VLAN_MTU;
1939 
1940 #if NVLAN > 0
1941 	if (sc->hw.mac_type != em_82575 && sc->hw.mac_type != em_82580 &&
1942 	    sc->hw.mac_type != em_82576 &&
1943 	    sc->hw.mac_type != em_i210 && sc->hw.mac_type != em_i350)
1944 		ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING;
1945 #endif
1946 
1947 	if (sc->hw.mac_type >= em_82543 && sc->hw.mac_type != em_82575 &&
1948 	    sc->hw.mac_type != em_82576 &&
1949 	    sc->hw.mac_type != em_82580 && sc->hw.mac_type != em_i210 &&
1950 	    sc->hw.mac_type != em_i350)
1951 		ifp->if_capabilities |= IFCAP_CSUM_TCPv4 | IFCAP_CSUM_UDPv4;
1952 
1953 	/*
1954 	 * Specify the media types supported by this adapter and register
1955 	 * callbacks to update media and link information
1956 	 */
1957 	ifmedia_init(&sc->media, IFM_IMASK, em_media_change,
1958 		     em_media_status);
1959 	if (sc->hw.media_type == em_media_type_fiber ||
1960 	    sc->hw.media_type == em_media_type_internal_serdes) {
1961 		if (sc->hw.mac_type == em_82545)
1962 			fiber_type = IFM_1000_LX;
1963 		ifmedia_add(&sc->media, IFM_ETHER | fiber_type | IFM_FDX,
1964 			    0, NULL);
1965 		ifmedia_add(&sc->media, IFM_ETHER | fiber_type,
1966 			    0, NULL);
1967 	} else {
1968 		ifmedia_add(&sc->media, IFM_ETHER | IFM_10_T, 0, NULL);
1969 		ifmedia_add(&sc->media, IFM_ETHER | IFM_10_T | IFM_FDX,
1970 			    0, NULL);
1971 		ifmedia_add(&sc->media, IFM_ETHER | IFM_100_TX,
1972 			    0, NULL);
1973 		ifmedia_add(&sc->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
1974 			    0, NULL);
1975 		if (sc->hw.phy_type != em_phy_ife) {
1976 			ifmedia_add(&sc->media, IFM_ETHER | IFM_1000_T | IFM_FDX,
1977 				    0, NULL);
1978 			ifmedia_add(&sc->media, IFM_ETHER | IFM_1000_T, 0, NULL);
1979 		}
1980 	}
1981 	ifmedia_add(&sc->media, IFM_ETHER | IFM_AUTO, 0, NULL);
1982 	ifmedia_set(&sc->media, IFM_ETHER | IFM_AUTO);
1983 
1984 	if_attach(ifp);
1985 	ether_ifattach(ifp);
1986 	em_enable_intr(sc);
1987 }
1988 
1989 int
1990 em_detach(struct device *self, int flags)
1991 {
1992 	struct em_softc *sc = (struct em_softc *)self;
1993 	struct ifnet *ifp = &sc->sc_ac.ac_if;
1994 	struct pci_attach_args *pa = &sc->osdep.em_pa;
1995 	pci_chipset_tag_t	pc = pa->pa_pc;
1996 
1997 	if (sc->sc_intrhand)
1998 		pci_intr_disestablish(pc, sc->sc_intrhand);
1999 	sc->sc_intrhand = 0;
2000 
2001 	em_stop(sc, 1);
2002 
2003 	em_free_pci_resources(sc);
2004 
2005 	ether_ifdetach(ifp);
2006 	if_detach(ifp);
2007 
2008 	return (0);
2009 }
2010 
2011 int
2012 em_activate(struct device *self, int act)
2013 {
2014 	struct em_softc *sc = (struct em_softc *)self;
2015 	struct ifnet *ifp = &sc->sc_ac.ac_if;
2016 	int rv = 0;
2017 
2018 	switch (act) {
2019 	case DVACT_SUSPEND:
2020 		if (ifp->if_flags & IFF_RUNNING)
2021 			em_stop(sc, 0);
2022 		/* We have no children atm, but we will soon */
2023 		rv = config_activate_children(self, act);
2024 		break;
2025 	case DVACT_RESUME:
2026 		if (ifp->if_flags & IFF_UP)
2027 			em_init(sc);
2028 		break;
2029 	default:
2030 		rv = config_activate_children(self, act);
2031 		break;
2032 	}
2033 	return (rv);
2034 }
2035 
2036 /*********************************************************************
2037  *
2038  *  Workaround for SmartSpeed on 82541 and 82547 controllers
2039  *
2040  **********************************************************************/
2041 void
2042 em_smartspeed(struct em_softc *sc)
2043 {
2044 	uint16_t phy_tmp;
2045 
2046 	if (sc->link_active || (sc->hw.phy_type != em_phy_igp) ||
2047 	    !sc->hw.autoneg || !(sc->hw.autoneg_advertised & ADVERTISE_1000_FULL))
2048 		return;
2049 
2050 	if (sc->smartspeed == 0) {
2051 		/* If Master/Slave config fault is asserted twice,
2052 		 * we assume back-to-back */
2053 		em_read_phy_reg(&sc->hw, PHY_1000T_STATUS, &phy_tmp);
2054 		if (!(phy_tmp & SR_1000T_MS_CONFIG_FAULT))
2055 			return;
2056 		em_read_phy_reg(&sc->hw, PHY_1000T_STATUS, &phy_tmp);
2057 		if (phy_tmp & SR_1000T_MS_CONFIG_FAULT) {
2058 			em_read_phy_reg(&sc->hw, PHY_1000T_CTRL,
2059 					&phy_tmp);
2060 			if (phy_tmp & CR_1000T_MS_ENABLE) {
2061 				phy_tmp &= ~CR_1000T_MS_ENABLE;
2062 				em_write_phy_reg(&sc->hw,
2063 						    PHY_1000T_CTRL, phy_tmp);
2064 				sc->smartspeed++;
2065 				if (sc->hw.autoneg &&
2066 				    !em_phy_setup_autoneg(&sc->hw) &&
2067 				    !em_read_phy_reg(&sc->hw, PHY_CTRL,
2068 						       &phy_tmp)) {
2069 					phy_tmp |= (MII_CR_AUTO_NEG_EN |
2070 						    MII_CR_RESTART_AUTO_NEG);
2071 					em_write_phy_reg(&sc->hw,
2072 							 PHY_CTRL, phy_tmp);
2073 				}
2074 			}
2075 		}
2076 		return;
2077 	} else if (sc->smartspeed == EM_SMARTSPEED_DOWNSHIFT) {
2078 		/* If still no link, perhaps using 2/3 pair cable */
2079 		em_read_phy_reg(&sc->hw, PHY_1000T_CTRL, &phy_tmp);
2080 		phy_tmp |= CR_1000T_MS_ENABLE;
2081 		em_write_phy_reg(&sc->hw, PHY_1000T_CTRL, phy_tmp);
2082 		if (sc->hw.autoneg &&
2083 		    !em_phy_setup_autoneg(&sc->hw) &&
2084 		    !em_read_phy_reg(&sc->hw, PHY_CTRL, &phy_tmp)) {
2085 			phy_tmp |= (MII_CR_AUTO_NEG_EN |
2086 				    MII_CR_RESTART_AUTO_NEG);
2087 			em_write_phy_reg(&sc->hw, PHY_CTRL, phy_tmp);
2088 		}
2089 	}
2090 	/* Restart process after EM_SMARTSPEED_MAX iterations */
2091 	if (sc->smartspeed++ == EM_SMARTSPEED_MAX)
2092 		sc->smartspeed = 0;
2093 }
2094 
2095 /*
2096  * Manage DMA'able memory.
2097  */
2098 int
2099 em_dma_malloc(struct em_softc *sc, bus_size_t size, struct em_dma_alloc *dma)
2100 {
2101 	int r;
2102 
2103 	r = bus_dmamap_create(sc->sc_dmat, size, 1,
2104 	    size, 0, BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, &dma->dma_map);
2105 	if (r != 0)
2106 		return (r);
2107 
2108 	r = bus_dmamem_alloc(sc->sc_dmat, size, PAGE_SIZE, 0, &dma->dma_seg,
2109 	    1, &dma->dma_nseg, BUS_DMA_WAITOK | BUS_DMA_ZERO);
2110 	if (r != 0)
2111 		goto destroy;
2112 
2113 	r = bus_dmamem_map(sc->sc_dmat, &dma->dma_seg, dma->dma_nseg, size,
2114 	    &dma->dma_vaddr, BUS_DMA_WAITOK | BUS_DMA_COHERENT);
2115 	if (r != 0)
2116 		goto free;
2117 
2118 	r = bus_dmamap_load(sc->sc_dmat, dma->dma_map, dma->dma_vaddr, size,
2119 	    NULL, BUS_DMA_WAITOK);
2120 	if (r != 0)
2121 		goto unmap;
2122 
2123 	dma->dma_size = size;
2124 	return (0);
2125 
2126 unmap:
2127 	bus_dmamem_unmap(sc->sc_dmat, dma->dma_vaddr, size);
2128 free:
2129 	bus_dmamem_free(sc->sc_dmat, &dma->dma_seg, dma->dma_nseg);
2130 destroy:
2131 	bus_dmamap_destroy(sc->sc_dmat, dma->dma_map);
2132 
2133 	return (r);
2134 }
2135 
2136 void
2137 em_dma_free(struct em_softc *sc, struct em_dma_alloc *dma)
2138 {
2139 	bus_dmamap_unload(sc->sc_dmat, dma->dma_map);
2140 	bus_dmamem_unmap(sc->sc_dmat, dma->dma_vaddr, dma->dma_size);
2141 	bus_dmamem_free(sc->sc_dmat, &dma->dma_seg, dma->dma_nseg);
2142 	bus_dmamap_destroy(sc->sc_dmat, dma->dma_map);
2143 }
2144 
2145 /*********************************************************************
2146  *
2147  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2148  *  the information needed to transmit a packet on the wire.
2149  *
2150  **********************************************************************/
2151 int
2152 em_allocate_transmit_structures(struct em_softc *sc)
2153 {
2154 	struct em_queue *que;
2155 
2156 	FOREACH_QUEUE(sc, que) {
2157 		bus_dmamap_sync(sc->sc_dmat, que->tx.sc_tx_dma.dma_map,
2158 		    0, que->tx.sc_tx_dma.dma_map->dm_mapsize,
2159 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2160 
2161 		que->tx.sc_tx_pkts_ring = mallocarray(sc->sc_tx_slots,
2162 		    sizeof(*que->tx.sc_tx_pkts_ring), M_DEVBUF, M_NOWAIT | M_ZERO);
2163 		if (que->tx.sc_tx_pkts_ring == NULL) {
2164 			printf("%s: Unable to allocate tx_buffer memory\n",
2165 			    DEVNAME(sc));
2166 			return (ENOMEM);
2167 		}
2168 	}
2169 
2170 	return (0);
2171 }
2172 
2173 /*********************************************************************
2174  *
2175  *  Allocate and initialize transmit structures.
2176  *
2177  **********************************************************************/
2178 int
2179 em_setup_transmit_structures(struct em_softc *sc)
2180 {
2181 	struct em_queue *que;
2182 	struct em_packet *pkt;
2183 	int error, i;
2184 
2185 	if ((error = em_allocate_transmit_structures(sc)) != 0)
2186 		goto fail;
2187 
2188 	FOREACH_QUEUE(sc, que) {
2189 		bzero((void *) que->tx.sc_tx_desc_ring,
2190 		    (sizeof(struct em_tx_desc)) * sc->sc_tx_slots);
2191 
2192 		for (i = 0; i < sc->sc_tx_slots; i++) {
2193 			pkt = &que->tx.sc_tx_pkts_ring[i];
2194 			error = bus_dmamap_create(sc->sc_dmat, MAX_JUMBO_FRAME_SIZE,
2195 			    EM_MAX_SCATTER / (sc->pcix_82544 ? 2 : 1),
2196 			    MAX_JUMBO_FRAME_SIZE, 0, BUS_DMA_NOWAIT, &pkt->pkt_map);
2197 			if (error != 0) {
2198 				printf("%s: Unable to create TX DMA map\n",
2199 				    DEVNAME(sc));
2200 				goto fail;
2201 			}
2202 		}
2203 
2204 		que->tx.sc_tx_desc_head = 0;
2205 		que->tx.sc_tx_desc_tail = 0;
2206 
2207 		/* Set checksum context */
2208 		que->tx.active_checksum_context = OFFLOAD_NONE;
2209 	}
2210 
2211 	return (0);
2212 
2213 fail:
2214 	em_free_transmit_structures(sc);
2215 	return (error);
2216 }
2217 
2218 /*********************************************************************
2219  *
2220  *  Enable transmit unit.
2221  *
2222  **********************************************************************/
2223 void
2224 em_initialize_transmit_unit(struct em_softc *sc)
2225 {
2226 	u_int32_t	reg_tctl, reg_tipg = 0;
2227 	u_int64_t	bus_addr;
2228 	struct em_queue *que;
2229 
2230 	INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
2231 
2232 	FOREACH_QUEUE(sc, que) {
2233 		/* Setup the Base and Length of the Tx Descriptor Ring */
2234 		bus_addr = que->tx.sc_tx_dma.dma_map->dm_segs[0].ds_addr;
2235 		E1000_WRITE_REG(&sc->hw, TDLEN(que->me),
2236 		    sc->sc_tx_slots *
2237 		    sizeof(struct em_tx_desc));
2238 		E1000_WRITE_REG(&sc->hw, TDBAH(que->me), (u_int32_t)(bus_addr >> 32));
2239 		E1000_WRITE_REG(&sc->hw, TDBAL(que->me), (u_int32_t)bus_addr);
2240 
2241 		/* Setup the HW Tx Head and Tail descriptor pointers */
2242 		E1000_WRITE_REG(&sc->hw, TDT(que->me), 0);
2243 		E1000_WRITE_REG(&sc->hw, TDH(que->me), 0);
2244 
2245 		HW_DEBUGOUT2("Base = %x, Length = %x\n",
2246 		    E1000_READ_REG(&sc->hw, TDBAL(que->me)),
2247 		    E1000_READ_REG(&sc->hw, TDLEN(que->me)));
2248 
2249 		/* Set the default values for the Tx Inter Packet Gap timer */
2250 		switch (sc->hw.mac_type) {
2251 		case em_82542_rev2_0:
2252 		case em_82542_rev2_1:
2253 			reg_tipg = DEFAULT_82542_TIPG_IPGT;
2254 			reg_tipg |= DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
2255 			reg_tipg |= DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
2256 			break;
2257 		case em_80003es2lan:
2258 			reg_tipg = DEFAULT_82543_TIPG_IPGR1;
2259 			reg_tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
2260 			break;
2261 		default:
2262 			if (sc->hw.media_type == em_media_type_fiber ||
2263 			    sc->hw.media_type == em_media_type_internal_serdes)
2264 				reg_tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
2265 			else
2266 				reg_tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
2267 			reg_tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
2268 			reg_tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
2269 		}
2270 
2271 
2272 		E1000_WRITE_REG(&sc->hw, TIPG, reg_tipg);
2273 		E1000_WRITE_REG(&sc->hw, TIDV, sc->tx_int_delay);
2274 		if (sc->hw.mac_type >= em_82540)
2275 			E1000_WRITE_REG(&sc->hw, TADV, sc->tx_abs_int_delay);
2276 
2277 		/* Setup Transmit Descriptor Base Settings */
2278 		que->tx.sc_txd_cmd = E1000_TXD_CMD_IFCS;
2279 
2280 		if (sc->hw.mac_type == em_82575 || sc->hw.mac_type == em_82580 ||
2281 		    sc->hw.mac_type == em_82576 ||
2282 		    sc->hw.mac_type == em_i210 || sc->hw.mac_type == em_i350) {
2283 			/* 82575/6 need to enable the TX queue and lack the IDE bit */
2284 			reg_tctl = E1000_READ_REG(&sc->hw, TXDCTL(que->me));
2285 			reg_tctl |= E1000_TXDCTL_QUEUE_ENABLE;
2286 			E1000_WRITE_REG(&sc->hw, TXDCTL(que->me), reg_tctl);
2287 		} else if (sc->tx_int_delay > 0)
2288 			que->tx.sc_txd_cmd |= E1000_TXD_CMD_IDE;
2289 	}
2290 
2291 	/* Program the Transmit Control Register */
2292 	reg_tctl = E1000_TCTL_PSP | E1000_TCTL_EN |
2293 		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2294 	if (sc->hw.mac_type >= em_82571)
2295 		reg_tctl |= E1000_TCTL_MULR;
2296 	if (sc->link_duplex == FULL_DUPLEX)
2297 		reg_tctl |= E1000_FDX_COLLISION_DISTANCE << E1000_COLD_SHIFT;
2298 	else
2299 		reg_tctl |= E1000_HDX_COLLISION_DISTANCE << E1000_COLD_SHIFT;
2300 	/* This write will effectively turn on the transmit unit */
2301 	E1000_WRITE_REG(&sc->hw, TCTL, reg_tctl);
2302 
2303 	/* SPT Si errata workaround to avoid data corruption */
2304 
2305 	if (sc->hw.mac_type == em_pch_spt) {
2306 		uint32_t	reg_val;
2307 
2308 		reg_val = EM_READ_REG(&sc->hw, E1000_IOSFPC);
2309 		reg_val |= E1000_RCTL_RDMTS_HEX;
2310 		EM_WRITE_REG(&sc->hw, E1000_IOSFPC, reg_val);
2311 
2312 		reg_val = E1000_READ_REG(&sc->hw, TARC0);
2313 		/* i218-i219 Specification Update 1.5.4.5 */
2314 		reg_val &= ~E1000_TARC0_CB_MULTIQ_3_REQ;
2315 		reg_val |= E1000_TARC0_CB_MULTIQ_2_REQ;
2316 		E1000_WRITE_REG(&sc->hw, TARC0, reg_val);
2317 	}
2318 }
2319 
2320 /*********************************************************************
2321  *
2322  *  Free all transmit related data structures.
2323  *
2324  **********************************************************************/
2325 void
2326 em_free_transmit_structures(struct em_softc *sc)
2327 {
2328 	struct em_queue *que;
2329 	struct em_packet *pkt;
2330 	int i;
2331 
2332 	INIT_DEBUGOUT("free_transmit_structures: begin");
2333 
2334 	FOREACH_QUEUE(sc, que) {
2335 		if (que->tx.sc_tx_pkts_ring != NULL) {
2336 			for (i = 0; i < sc->sc_tx_slots; i++) {
2337 				pkt = &que->tx.sc_tx_pkts_ring[i];
2338 
2339 				if (pkt->pkt_m != NULL) {
2340 					bus_dmamap_sync(sc->sc_dmat, pkt->pkt_map,
2341 					    0, pkt->pkt_map->dm_mapsize,
2342 					    BUS_DMASYNC_POSTWRITE);
2343 					bus_dmamap_unload(sc->sc_dmat,
2344 					    pkt->pkt_map);
2345 
2346 					m_freem(pkt->pkt_m);
2347 					pkt->pkt_m = NULL;
2348 				}
2349 
2350 				if (pkt->pkt_map != NULL) {
2351 					bus_dmamap_destroy(sc->sc_dmat,
2352 					    pkt->pkt_map);
2353 					pkt->pkt_map = NULL;
2354 				}
2355 			}
2356 
2357 			free(que->tx.sc_tx_pkts_ring, M_DEVBUF,
2358 			    sc->sc_tx_slots * sizeof(*que->tx.sc_tx_pkts_ring));
2359 			que->tx.sc_tx_pkts_ring = NULL;
2360 		}
2361 
2362 		bus_dmamap_sync(sc->sc_dmat, que->tx.sc_tx_dma.dma_map,
2363 		    0, que->tx.sc_tx_dma.dma_map->dm_mapsize,
2364 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2365 	}
2366 }
2367 
2368 /*********************************************************************
2369  *
2370  *  The offload context needs to be set when we transfer the first
2371  *  packet of a particular protocol (TCP/UDP). We change the
2372  *  context only if the protocol type changes.
2373  *
2374  **********************************************************************/
2375 u_int
2376 em_transmit_checksum_setup(struct em_queue *que, struct mbuf *mp, u_int head,
2377     u_int32_t *txd_upper, u_int32_t *txd_lower)
2378 {
2379 	struct em_context_desc *TXD;
2380 
2381 	if (mp->m_pkthdr.csum_flags & M_TCP_CSUM_OUT) {
2382 		*txd_upper = E1000_TXD_POPTS_TXSM << 8;
2383 		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
2384 		if (que->tx.active_checksum_context == OFFLOAD_TCP_IP)
2385 			return (0);
2386 		else
2387 			que->tx.active_checksum_context = OFFLOAD_TCP_IP;
2388 	} else if (mp->m_pkthdr.csum_flags & M_UDP_CSUM_OUT) {
2389 		*txd_upper = E1000_TXD_POPTS_TXSM << 8;
2390 		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
2391 		if (que->tx.active_checksum_context == OFFLOAD_UDP_IP)
2392 			return (0);
2393 		else
2394 			que->tx.active_checksum_context = OFFLOAD_UDP_IP;
2395 	} else {
2396 		*txd_upper = 0;
2397 		*txd_lower = 0;
2398 		return (0);
2399 	}
2400 
2401 	/* If we reach this point, the checksum offload context
2402 	 * needs to be reset.
2403 	 */
2404 	TXD = (struct em_context_desc *)&que->tx.sc_tx_desc_ring[head];
2405 
2406 	TXD->lower_setup.ip_fields.ipcss = ETHER_HDR_LEN;
2407 	TXD->lower_setup.ip_fields.ipcso =
2408 	    ETHER_HDR_LEN + offsetof(struct ip, ip_sum);
2409 	TXD->lower_setup.ip_fields.ipcse =
2410 	    htole16(ETHER_HDR_LEN + sizeof(struct ip) - 1);
2411 
2412 	TXD->upper_setup.tcp_fields.tucss =
2413 	    ETHER_HDR_LEN + sizeof(struct ip);
2414 	TXD->upper_setup.tcp_fields.tucse = htole16(0);
2415 
2416 	if (que->tx.active_checksum_context == OFFLOAD_TCP_IP) {
2417 		TXD->upper_setup.tcp_fields.tucso =
2418 		    ETHER_HDR_LEN + sizeof(struct ip) +
2419 		    offsetof(struct tcphdr, th_sum);
2420 	} else if (que->tx.active_checksum_context == OFFLOAD_UDP_IP) {
2421 		TXD->upper_setup.tcp_fields.tucso =
2422 		    ETHER_HDR_LEN + sizeof(struct ip) +
2423 		    offsetof(struct udphdr, uh_sum);
2424 	}
2425 
2426 	TXD->tcp_seg_setup.data = htole32(0);
2427 	TXD->cmd_and_length = htole32(que->tx.sc_txd_cmd | E1000_TXD_CMD_DEXT);
2428 
2429 	return (1);
2430 }
2431 
2432 /**********************************************************************
2433  *
2434  *  Examine each tx_buffer in the used queue. If the hardware is done
2435  *  processing the packet then free associated resources. The
2436  *  tx_buffer is put back on the free queue.
2437  *
2438  **********************************************************************/
2439 void
2440 em_txeof(struct em_queue *que)
2441 {
2442 	struct em_softc *sc = que->sc;
2443 	struct ifnet *ifp = &sc->sc_ac.ac_if;
2444 	struct em_packet *pkt;
2445 	struct em_tx_desc *desc;
2446 	u_int head, tail;
2447 	u_int free = 0;
2448 
2449 	head = que->tx.sc_tx_desc_head;
2450 	tail = que->tx.sc_tx_desc_tail;
2451 
2452 	if (head == tail)
2453 		return;
2454 
2455 	bus_dmamap_sync(sc->sc_dmat, que->tx.sc_tx_dma.dma_map,
2456 	    0, que->tx.sc_tx_dma.dma_map->dm_mapsize,
2457 	    BUS_DMASYNC_POSTREAD);
2458 
2459 	do {
2460 		pkt = &que->tx.sc_tx_pkts_ring[tail];
2461 		desc = &que->tx.sc_tx_desc_ring[pkt->pkt_eop];
2462 
2463 		if (!ISSET(desc->upper.fields.status, E1000_TXD_STAT_DD))
2464 			break;
2465 
2466 		bus_dmamap_sync(sc->sc_dmat, pkt->pkt_map,
2467 		    0, pkt->pkt_map->dm_mapsize,
2468 		    BUS_DMASYNC_POSTWRITE);
2469 		bus_dmamap_unload(sc->sc_dmat, pkt->pkt_map);
2470 
2471 		KASSERT(pkt->pkt_m != NULL);
2472 
2473 		m_freem(pkt->pkt_m);
2474 		pkt->pkt_m = NULL;
2475 
2476 		tail = pkt->pkt_eop;
2477 
2478 		if (++tail == sc->sc_tx_slots)
2479 			tail = 0;
2480 
2481 		free++;
2482 	} while (tail != head);
2483 
2484 	bus_dmamap_sync(sc->sc_dmat, que->tx.sc_tx_dma.dma_map,
2485 	    0, que->tx.sc_tx_dma.dma_map->dm_mapsize,
2486 	    BUS_DMASYNC_PREREAD);
2487 
2488 	if (free == 0)
2489 		return;
2490 
2491 	que->tx.sc_tx_desc_tail = tail;
2492 
2493 	if (ifq_is_oactive(&ifp->if_snd))
2494 		ifq_restart(&ifp->if_snd);
2495 	else if (tail == head)
2496 		ifp->if_timer = 0;
2497 }
2498 
2499 /*********************************************************************
2500  *
2501  *  Get a buffer from system mbuf buffer pool.
2502  *
2503  **********************************************************************/
2504 int
2505 em_get_buf(struct em_queue *que, int i)
2506 {
2507 	struct em_softc *sc = que->sc;
2508 	struct mbuf    *m;
2509 	struct em_packet *pkt;
2510 	struct em_rx_desc *desc;
2511 	int error;
2512 
2513 	pkt = &que->rx.sc_rx_pkts_ring[i];
2514 	desc = &que->rx.sc_rx_desc_ring[i];
2515 
2516 	KASSERT(pkt->pkt_m == NULL);
2517 
2518 	m = MCLGETI(NULL, M_DONTWAIT, NULL, EM_MCLBYTES);
2519 	if (m == NULL) {
2520 		sc->mbuf_cluster_failed++;
2521 		return (ENOBUFS);
2522 	}
2523 	m->m_len = m->m_pkthdr.len = EM_MCLBYTES;
2524 	m_adj(m, ETHER_ALIGN);
2525 
2526 	error = bus_dmamap_load_mbuf(sc->sc_dmat, pkt->pkt_map,
2527 	    m, BUS_DMA_NOWAIT);
2528 	if (error) {
2529 		m_freem(m);
2530 		return (error);
2531 	}
2532 
2533 	bus_dmamap_sync(sc->sc_dmat, pkt->pkt_map,
2534 	    0, pkt->pkt_map->dm_mapsize,
2535 	    BUS_DMASYNC_PREREAD);
2536 	pkt->pkt_m = m;
2537 
2538 	memset(desc, 0, sizeof(*desc));
2539 	htolem64(&desc->buffer_addr, pkt->pkt_map->dm_segs[0].ds_addr);
2540 
2541 	return (0);
2542 }
2543 
2544 /*********************************************************************
2545  *
2546  *  Allocate memory for rx_buffer structures. Since we use one
2547  *  rx_buffer per received packet, the maximum number of rx_buffer's
2548  *  that we'll need is equal to the number of receive descriptors
2549  *  that we've allocated.
2550  *
2551  **********************************************************************/
2552 int
2553 em_allocate_receive_structures(struct em_softc *sc)
2554 {
2555 	struct em_queue *que;
2556 	struct em_packet *pkt;
2557 	int i;
2558 	int error;
2559 
2560 	FOREACH_QUEUE(sc, que) {
2561 		que->rx.sc_rx_pkts_ring = mallocarray(sc->sc_rx_slots,
2562 		    sizeof(*que->rx.sc_rx_pkts_ring),
2563 		    M_DEVBUF, M_NOWAIT | M_ZERO);
2564 		if (que->rx.sc_rx_pkts_ring == NULL) {
2565 			printf("%s: Unable to allocate rx_buffer memory\n",
2566 			    DEVNAME(sc));
2567 			return (ENOMEM);
2568 		}
2569 
2570 		bus_dmamap_sync(sc->sc_dmat, que->rx.sc_rx_dma.dma_map,
2571 		    0, que->rx.sc_rx_dma.dma_map->dm_mapsize,
2572 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2573 
2574 		for (i = 0; i < sc->sc_rx_slots; i++) {
2575 			pkt = &que->rx.sc_rx_pkts_ring[i];
2576 
2577 			error = bus_dmamap_create(sc->sc_dmat, EM_MCLBYTES, 1,
2578 			    EM_MCLBYTES, 0, BUS_DMA_NOWAIT, &pkt->pkt_map);
2579 			if (error != 0) {
2580 				printf("%s: em_allocate_receive_structures: "
2581 				    "bus_dmamap_create failed; error %u\n",
2582 				    DEVNAME(sc), error);
2583 				goto fail;
2584 			}
2585 
2586 			pkt->pkt_m = NULL;
2587 		}
2588 	}
2589 
2590         return (0);
2591 
2592 fail:
2593 	em_free_receive_structures(sc);
2594 	return (error);
2595 }
2596 
2597 /*********************************************************************
2598  *
2599  *  Allocate and initialize receive structures.
2600  *
2601  **********************************************************************/
2602 int
2603 em_setup_receive_structures(struct em_softc *sc)
2604 {
2605 	struct ifnet *ifp = &sc->sc_ac.ac_if;
2606 	struct em_queue *que;
2607 	u_int lwm;
2608 
2609 	if (em_allocate_receive_structures(sc))
2610 		return (ENOMEM);
2611 
2612 	FOREACH_QUEUE(sc, que) {
2613 		memset(que->rx.sc_rx_desc_ring, 0,
2614 		    sc->sc_rx_slots * sizeof(*que->rx.sc_rx_desc_ring));
2615 
2616 		/* Setup our descriptor pointers */
2617 		que->rx.sc_rx_desc_tail = 0;
2618 		que->rx.sc_rx_desc_head = sc->sc_rx_slots - 1;
2619 
2620 		lwm = max(4, 2 * ((ifp->if_hardmtu / MCLBYTES) + 1));
2621 		if_rxr_init(&que->rx.sc_rx_ring, lwm, sc->sc_rx_slots);
2622 
2623 		if (em_rxfill(que) == 0) {
2624 			printf("%s: unable to fill any rx descriptors\n",
2625 			    DEVNAME(sc));
2626 			return (ENOMEM);
2627 		}
2628 	}
2629 
2630 	return (0);
2631 }
2632 
2633 /*********************************************************************
2634  *
2635  *  Enable receive unit.
2636  *
2637  **********************************************************************/
2638 void
2639 em_initialize_receive_unit(struct em_softc *sc)
2640 {
2641 	struct em_queue *que;
2642 	u_int32_t	reg_rctl;
2643 	u_int32_t	reg_rxcsum;
2644 	u_int32_t	reg_srrctl;
2645 	u_int64_t	bus_addr;
2646 
2647 	INIT_DEBUGOUT("em_initialize_receive_unit: begin");
2648 
2649 	/* Make sure receives are disabled while setting up the descriptor ring */
2650 	E1000_WRITE_REG(&sc->hw, RCTL, 0);
2651 
2652 	/* Set the Receive Delay Timer Register */
2653 	E1000_WRITE_REG(&sc->hw, RDTR,
2654 			sc->rx_int_delay | E1000_RDT_FPDB);
2655 
2656 	if (sc->hw.mac_type >= em_82540) {
2657 		if (sc->rx_int_delay)
2658 			E1000_WRITE_REG(&sc->hw, RADV, sc->rx_abs_int_delay);
2659 
2660 		/* Set the interrupt throttling rate.  Value is calculated
2661 		 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns) */
2662 		E1000_WRITE_REG(&sc->hw, ITR, DEFAULT_ITR);
2663 	}
2664 
2665 	/* Setup the Receive Control Register */
2666 	reg_rctl = E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
2667 	    E1000_RCTL_RDMTS_HALF |
2668 	    (sc->hw.mc_filter_type << E1000_RCTL_MO_SHIFT);
2669 
2670 	if (sc->hw.tbi_compatibility_on == TRUE)
2671 		reg_rctl |= E1000_RCTL_SBP;
2672 
2673 	/*
2674 	 * The i350 has a bug where it always strips the CRC whether
2675 	 * asked to or not.  So ask for stripped CRC here and
2676 	 * cope in rxeof
2677 	 */
2678 	if (sc->hw.mac_type == em_i210 || sc->hw.mac_type == em_i350)
2679 		reg_rctl |= E1000_RCTL_SECRC;
2680 
2681 	switch (sc->sc_rx_buffer_len) {
2682 	default:
2683 	case EM_RXBUFFER_2048:
2684 		reg_rctl |= E1000_RCTL_SZ_2048;
2685 		break;
2686 	case EM_RXBUFFER_4096:
2687 		reg_rctl |= E1000_RCTL_SZ_4096|E1000_RCTL_BSEX|E1000_RCTL_LPE;
2688 		break;
2689 	case EM_RXBUFFER_8192:
2690 		reg_rctl |= E1000_RCTL_SZ_8192|E1000_RCTL_BSEX|E1000_RCTL_LPE;
2691 		break;
2692 	case EM_RXBUFFER_16384:
2693 		reg_rctl |= E1000_RCTL_SZ_16384|E1000_RCTL_BSEX|E1000_RCTL_LPE;
2694 		break;
2695 	}
2696 
2697 	if (sc->hw.max_frame_size != ETHER_MAX_LEN)
2698 		reg_rctl |= E1000_RCTL_LPE;
2699 
2700 	/* Enable 82543 Receive Checksum Offload for TCP and UDP */
2701 	if (sc->hw.mac_type >= em_82543) {
2702 		reg_rxcsum = E1000_READ_REG(&sc->hw, RXCSUM);
2703 		reg_rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
2704 		E1000_WRITE_REG(&sc->hw, RXCSUM, reg_rxcsum);
2705 	}
2706 
2707 	/*
2708 	 * XXX TEMPORARY WORKAROUND: on some systems with 82573
2709 	 * long latencies are observed, like Lenovo X60.
2710 	 */
2711 	if (sc->hw.mac_type == em_82573)
2712 		E1000_WRITE_REG(&sc->hw, RDTR, 0x20);
2713 
2714 	FOREACH_QUEUE(sc, que) {
2715 		if (sc->num_queues > 1) {
2716 			/*
2717 			 * Disable Drop Enable for every queue, default has
2718 			 * it enabled for queues > 0
2719 			 */
2720 			reg_srrctl = E1000_READ_REG(&sc->hw, SRRCTL(que->me));
2721 			reg_srrctl &= ~E1000_SRRCTL_DROP_EN;
2722 			E1000_WRITE_REG(&sc->hw, SRRCTL(que->me), reg_srrctl);
2723 		}
2724 
2725 		/* Setup the Base and Length of the Rx Descriptor Ring */
2726 		bus_addr = que->rx.sc_rx_dma.dma_map->dm_segs[0].ds_addr;
2727 		E1000_WRITE_REG(&sc->hw, RDLEN(que->me),
2728 		    sc->sc_rx_slots * sizeof(*que->rx.sc_rx_desc_ring));
2729 		E1000_WRITE_REG(&sc->hw, RDBAH(que->me), (u_int32_t)(bus_addr >> 32));
2730 		E1000_WRITE_REG(&sc->hw, RDBAL(que->me), (u_int32_t)bus_addr);
2731 
2732 		if (sc->hw.mac_type == em_82575 || sc->hw.mac_type == em_82580 ||
2733 		    sc->hw.mac_type == em_82576 ||
2734 		    sc->hw.mac_type == em_i210 || sc->hw.mac_type == em_i350) {
2735 			/* 82575/6 need to enable the RX queue */
2736 			uint32_t reg;
2737 			reg = E1000_READ_REG(&sc->hw, RXDCTL(que->me));
2738 			reg |= E1000_RXDCTL_QUEUE_ENABLE;
2739 			E1000_WRITE_REG(&sc->hw, RXDCTL(que->me), reg);
2740 		}
2741 	}
2742 
2743 	/* Enable Receives */
2744 	E1000_WRITE_REG(&sc->hw, RCTL, reg_rctl);
2745 
2746 	/* Setup the HW Rx Head and Tail Descriptor Pointers */
2747 	FOREACH_QUEUE(sc, que) {
2748 		E1000_WRITE_REG(&sc->hw, RDH(que->me), 0);
2749 		E1000_WRITE_REG(&sc->hw, RDT(que->me), que->rx.sc_rx_desc_head);
2750 	}
2751 }
2752 
2753 /*********************************************************************
2754  *
2755  *  Free receive related data structures.
2756  *
2757  **********************************************************************/
2758 void
2759 em_free_receive_structures(struct em_softc *sc)
2760 {
2761 	struct em_queue *que;
2762 	struct em_packet *pkt;
2763 	int i;
2764 
2765 	INIT_DEBUGOUT("free_receive_structures: begin");
2766 
2767 	FOREACH_QUEUE(sc, que) {
2768 		if_rxr_init(&que->rx.sc_rx_ring, 0, 0);
2769 
2770 		bus_dmamap_sync(sc->sc_dmat, que->rx.sc_rx_dma.dma_map,
2771 		    0, que->rx.sc_rx_dma.dma_map->dm_mapsize,
2772 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2773 
2774 		if (que->rx.sc_rx_pkts_ring != NULL) {
2775 			for (i = 0; i < sc->sc_rx_slots; i++) {
2776 				pkt = &que->rx.sc_rx_pkts_ring[i];
2777 				if (pkt->pkt_m != NULL) {
2778 					bus_dmamap_sync(sc->sc_dmat,
2779 					    pkt->pkt_map,
2780 					    0, pkt->pkt_map->dm_mapsize,
2781 					    BUS_DMASYNC_POSTREAD);
2782 					bus_dmamap_unload(sc->sc_dmat,
2783 					    pkt->pkt_map);
2784 					m_freem(pkt->pkt_m);
2785 					pkt->pkt_m = NULL;
2786 				}
2787 				bus_dmamap_destroy(sc->sc_dmat, pkt->pkt_map);
2788 			}
2789 
2790 			free(que->rx.sc_rx_pkts_ring, M_DEVBUF,
2791 			    sc->sc_rx_slots * sizeof(*que->rx.sc_rx_pkts_ring));
2792 			que->rx.sc_rx_pkts_ring = NULL;
2793 		}
2794 
2795 		if (que->rx.fmp != NULL) {
2796 			m_freem(que->rx.fmp);
2797 			que->rx.fmp = NULL;
2798 			que->rx.lmp = NULL;
2799 		}
2800 	}
2801 }
2802 
2803 int
2804 em_rxfill(struct em_queue *que)
2805 {
2806 	struct em_softc *sc = que->sc;
2807 	u_int slots;
2808 	int post = 0;
2809 	int i;
2810 
2811 	i = que->rx.sc_rx_desc_head;
2812 
2813 	bus_dmamap_sync(sc->sc_dmat, que->rx.sc_rx_dma.dma_map,
2814 	    0, que->rx.sc_rx_dma.dma_map->dm_mapsize,
2815 	    BUS_DMASYNC_POSTWRITE);
2816 
2817 	for (slots = if_rxr_get(&que->rx.sc_rx_ring, sc->sc_rx_slots);
2818 	    slots > 0; slots--) {
2819 		if (++i == sc->sc_rx_slots)
2820 			i = 0;
2821 
2822 		if (em_get_buf(que, i) != 0)
2823 			break;
2824 
2825 		que->rx.sc_rx_desc_head = i;
2826 		post = 1;
2827 	}
2828 
2829 	if_rxr_put(&que->rx.sc_rx_ring, slots);
2830 
2831 	bus_dmamap_sync(sc->sc_dmat, que->rx.sc_rx_dma.dma_map,
2832 	    0, que->rx.sc_rx_dma.dma_map->dm_mapsize,
2833 	    BUS_DMASYNC_PREWRITE);
2834 
2835 	return (post);
2836 }
2837 
2838 void
2839 em_rxrefill(void *arg)
2840 {
2841 	struct em_queue *que = arg;
2842 	struct em_softc *sc = que->sc;
2843 
2844 	if (em_rxfill(que))
2845 		E1000_WRITE_REG(&sc->hw, RDT(que->me), que->rx.sc_rx_desc_head);
2846 	else if (if_rxr_needrefill(&que->rx.sc_rx_ring))
2847 		timeout_add(&que->rx_refill, 1);
2848 }
2849 
2850 /*********************************************************************
2851  *
2852  *  This routine executes in interrupt context. It replenishes
2853  *  the mbufs in the descriptor and sends data which has been
2854  *  dma'ed into host memory to upper layer.
2855  *
2856  *********************************************************************/
2857 int
2858 em_rxeof(struct em_queue *que)
2859 {
2860 	struct em_softc	    *sc = que->sc;
2861 	struct ifnet	    *ifp = &sc->sc_ac.ac_if;
2862 	struct mbuf_list    ml = MBUF_LIST_INITIALIZER();
2863 	struct mbuf	    *m;
2864 	u_int8_t	    accept_frame = 0;
2865 	u_int8_t	    eop = 0;
2866 	u_int16_t	    len, desc_len, prev_len_adj;
2867 	int		    i, rv = 0;
2868 
2869 	/* Pointer to the receive descriptor being examined. */
2870 	struct em_rx_desc   *desc;
2871 	struct em_packet    *pkt;
2872 	u_int8_t	    status;
2873 
2874 	if (if_rxr_inuse(&que->rx.sc_rx_ring) == 0)
2875 		return (0);
2876 
2877 	i = que->rx.sc_rx_desc_tail;
2878 
2879 	bus_dmamap_sync(sc->sc_dmat, que->rx.sc_rx_dma.dma_map,
2880 	    0, que->rx.sc_rx_dma.dma_map->dm_mapsize,
2881 	    BUS_DMASYNC_POSTREAD);
2882 
2883 	do {
2884 		m = NULL;
2885 
2886 		pkt = &que->rx.sc_rx_pkts_ring[i];
2887 		desc = &que->rx.sc_rx_desc_ring[i];
2888 
2889 		status = desc->status;
2890 		if (!ISSET(status, E1000_RXD_STAT_DD))
2891 			break;
2892 
2893 		/* pull the mbuf off the ring */
2894 		bus_dmamap_sync(sc->sc_dmat, pkt->pkt_map,
2895 		    0, pkt->pkt_map->dm_mapsize,
2896 		    BUS_DMASYNC_POSTREAD);
2897 		bus_dmamap_unload(sc->sc_dmat, pkt->pkt_map);
2898 		m = pkt->pkt_m;
2899 		pkt->pkt_m = NULL;
2900 
2901 		KASSERT(m != NULL);
2902 
2903 		if_rxr_put(&que->rx.sc_rx_ring, 1);
2904 		rv = 1;
2905 
2906 		accept_frame = 1;
2907 		prev_len_adj = 0;
2908 		desc_len = letoh16(desc->length);
2909 
2910 		if (status & E1000_RXD_STAT_EOP) {
2911 			eop = 1;
2912 			if (desc_len < ETHER_CRC_LEN) {
2913 				len = 0;
2914 				prev_len_adj = ETHER_CRC_LEN - desc_len;
2915 			} else if (sc->hw.mac_type == em_i210 ||
2916 			    sc->hw.mac_type == em_i350)
2917 				len = desc_len;
2918 			else
2919 				len = desc_len - ETHER_CRC_LEN;
2920 		} else {
2921 			eop = 0;
2922 			len = desc_len;
2923 		}
2924 
2925 		if (desc->errors & E1000_RXD_ERR_FRAME_ERR_MASK) {
2926 			u_int8_t last_byte;
2927 			u_int32_t pkt_len = desc_len;
2928 
2929 			if (que->rx.fmp != NULL)
2930 				pkt_len += que->rx.fmp->m_pkthdr.len;
2931 
2932 			last_byte = *(mtod(m, caddr_t) + desc_len - 1);
2933 			if (TBI_ACCEPT(&sc->hw, status, desc->errors,
2934 			    pkt_len, last_byte)) {
2935 #if NKSTAT > 0
2936 				em_tbi_adjust_stats(sc,
2937 				    pkt_len, sc->hw.mac_addr);
2938 #endif
2939 				if (len > 0)
2940 					len--;
2941 			} else
2942 				accept_frame = 0;
2943 		}
2944 
2945 		if (accept_frame) {
2946 			/* Assign correct length to the current fragment */
2947 			m->m_len = len;
2948 
2949 			if (que->rx.fmp == NULL) {
2950 				m->m_pkthdr.len = m->m_len;
2951 				que->rx.fmp = m;	 /* Store the first mbuf */
2952 				que->rx.lmp = m;
2953 			} else {
2954 				/* Chain mbuf's together */
2955 				m->m_flags &= ~M_PKTHDR;
2956 				/*
2957 				 * Adjust length of previous mbuf in chain if
2958 				 * we received less than 4 bytes in the last
2959 				 * descriptor.
2960 				 */
2961 				if (prev_len_adj > 0) {
2962 					que->rx.lmp->m_len -= prev_len_adj;
2963 					que->rx.fmp->m_pkthdr.len -= prev_len_adj;
2964 				}
2965 				que->rx.lmp->m_next = m;
2966 				que->rx.lmp = m;
2967 				que->rx.fmp->m_pkthdr.len += m->m_len;
2968 			}
2969 
2970 			if (eop) {
2971 				m = que->rx.fmp;
2972 
2973 				em_receive_checksum(sc, desc, m);
2974 #if NVLAN > 0
2975 				if (desc->status & E1000_RXD_STAT_VP) {
2976 					m->m_pkthdr.ether_vtag =
2977 					    letoh16(desc->special);
2978 					m->m_flags |= M_VLANTAG;
2979 				}
2980 #endif
2981 				ml_enqueue(&ml, m);
2982 
2983 				que->rx.fmp = NULL;
2984 				que->rx.lmp = NULL;
2985 			}
2986 		} else {
2987 			que->rx.dropped_pkts++;
2988 
2989 			if (que->rx.fmp != NULL) {
2990 				m_freem(que->rx.fmp);
2991 				que->rx.fmp = NULL;
2992 				que->rx.lmp = NULL;
2993 			}
2994 
2995 			m_freem(m);
2996 		}
2997 
2998 		/* Advance our pointers to the next descriptor. */
2999 		if (++i == sc->sc_rx_slots)
3000 			i = 0;
3001 	} while (if_rxr_inuse(&que->rx.sc_rx_ring) > 0);
3002 
3003 	bus_dmamap_sync(sc->sc_dmat, que->rx.sc_rx_dma.dma_map,
3004 	    0, que->rx.sc_rx_dma.dma_map->dm_mapsize,
3005 	    BUS_DMASYNC_PREREAD);
3006 
3007 	que->rx.sc_rx_desc_tail = i;
3008 
3009 	if (ifiq_input(&ifp->if_rcv, &ml))
3010 		if_rxr_livelocked(&que->rx.sc_rx_ring);
3011 
3012 	return (rv);
3013 }
3014 
3015 /*********************************************************************
3016  *
3017  *  Verify that the hardware indicated that the checksum is valid.
3018  *  Inform the stack about the status of checksum so that stack
3019  *  doesn't spend time verifying the checksum.
3020  *
3021  *********************************************************************/
3022 void
3023 em_receive_checksum(struct em_softc *sc, struct em_rx_desc *rx_desc,
3024     struct mbuf *mp)
3025 {
3026 	/* 82543 or newer only */
3027 	if ((sc->hw.mac_type < em_82543) ||
3028 	    /* Ignore Checksum bit is set */
3029 	    (rx_desc->status & E1000_RXD_STAT_IXSM)) {
3030 		mp->m_pkthdr.csum_flags = 0;
3031 		return;
3032 	}
3033 
3034 	if (rx_desc->status & E1000_RXD_STAT_IPCS) {
3035 		/* Did it pass? */
3036 		if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
3037 			/* IP Checksum Good */
3038 			mp->m_pkthdr.csum_flags = M_IPV4_CSUM_IN_OK;
3039 
3040 		} else
3041 			mp->m_pkthdr.csum_flags = 0;
3042 	}
3043 
3044 	if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
3045 		/* Did it pass? */
3046 		if (!(rx_desc->errors & E1000_RXD_ERR_TCPE))
3047 			mp->m_pkthdr.csum_flags |=
3048 				M_TCP_CSUM_IN_OK | M_UDP_CSUM_IN_OK;
3049 	}
3050 }
3051 
3052 /*
3053  * This turns on the hardware offload of the VLAN
3054  * tag insertion and strip
3055  */
3056 void
3057 em_enable_hw_vlans(struct em_softc *sc)
3058 {
3059 	uint32_t ctrl;
3060 
3061 	ctrl = E1000_READ_REG(&sc->hw, CTRL);
3062 	ctrl |= E1000_CTRL_VME;
3063 	E1000_WRITE_REG(&sc->hw, CTRL, ctrl);
3064 }
3065 
3066 void
3067 em_enable_intr(struct em_softc *sc)
3068 {
3069 	uint32_t mask;
3070 
3071 	if (sc->msix) {
3072 		mask = sc->msix_queuesmask | sc->msix_linkmask;
3073 		E1000_WRITE_REG(&sc->hw, EIAC, mask);
3074 		E1000_WRITE_REG(&sc->hw, EIAM, mask);
3075 		E1000_WRITE_REG(&sc->hw, EIMS, mask);
3076 		E1000_WRITE_REG(&sc->hw, IMS, E1000_IMS_LSC);
3077 	} else
3078 		E1000_WRITE_REG(&sc->hw, IMS, (IMS_ENABLE_MASK));
3079 }
3080 
3081 void
3082 em_disable_intr(struct em_softc *sc)
3083 {
3084 	/*
3085 	 * The first version of 82542 had an errata where when link
3086 	 * was forced it would stay up even if the cable was disconnected
3087 	 * Sequence errors were used to detect the disconnect and then
3088 	 * the driver would unforce the link.  This code is in the ISR.
3089 	 * For this to work correctly the Sequence error interrupt had
3090 	 * to be enabled all the time.
3091 	 */
3092 	if (sc->msix) {
3093 		E1000_WRITE_REG(&sc->hw, EIMC, ~0);
3094 		E1000_WRITE_REG(&sc->hw, EIAC, 0);
3095 	} else if (sc->hw.mac_type == em_82542_rev2_0)
3096 		E1000_WRITE_REG(&sc->hw, IMC, (0xffffffff & ~E1000_IMC_RXSEQ));
3097 	else
3098 		E1000_WRITE_REG(&sc->hw, IMC, 0xffffffff);
3099 }
3100 
3101 void
3102 em_write_pci_cfg(struct em_hw *hw, uint32_t reg, uint16_t *value)
3103 {
3104 	struct pci_attach_args *pa = &((struct em_osdep *)hw->back)->em_pa;
3105 	pcireg_t val;
3106 
3107 	val = pci_conf_read(pa->pa_pc, pa->pa_tag, reg & ~0x3);
3108 	if (reg & 0x2) {
3109 		val &= 0x0000ffff;
3110 		val |= (*value << 16);
3111 	} else {
3112 		val &= 0xffff0000;
3113 		val |= *value;
3114 	}
3115 	pci_conf_write(pa->pa_pc, pa->pa_tag, reg & ~0x3, val);
3116 }
3117 
3118 void
3119 em_read_pci_cfg(struct em_hw *hw, uint32_t reg, uint16_t *value)
3120 {
3121 	struct pci_attach_args *pa = &((struct em_osdep *)hw->back)->em_pa;
3122 	pcireg_t val;
3123 
3124 	val = pci_conf_read(pa->pa_pc, pa->pa_tag, reg & ~0x3);
3125 	if (reg & 0x2)
3126 		*value = (val >> 16) & 0xffff;
3127 	else
3128 		*value = val & 0xffff;
3129 }
3130 
3131 void
3132 em_pci_set_mwi(struct em_hw *hw)
3133 {
3134 	struct pci_attach_args *pa = &((struct em_osdep *)hw->back)->em_pa;
3135 
3136 	pci_conf_write(pa->pa_pc, pa->pa_tag, PCI_COMMAND_STATUS_REG,
3137 		(hw->pci_cmd_word | CMD_MEM_WRT_INVALIDATE));
3138 }
3139 
3140 void
3141 em_pci_clear_mwi(struct em_hw *hw)
3142 {
3143 	struct pci_attach_args *pa = &((struct em_osdep *)hw->back)->em_pa;
3144 
3145 	pci_conf_write(pa->pa_pc, pa->pa_tag, PCI_COMMAND_STATUS_REG,
3146 		(hw->pci_cmd_word & ~CMD_MEM_WRT_INVALIDATE));
3147 }
3148 
3149 /*
3150  * We may eventually really do this, but its unnecessary
3151  * for now so we just return unsupported.
3152  */
3153 int32_t
3154 em_read_pcie_cap_reg(struct em_hw *hw, uint32_t reg, uint16_t *value)
3155 {
3156 	return -E1000_NOT_IMPLEMENTED;
3157 }
3158 
3159 /*********************************************************************
3160 * 82544 Coexistence issue workaround.
3161 *    There are 2 issues.
3162 *       1. Transmit Hang issue.
3163 *    To detect this issue, following equation can be used...
3164 *          SIZE[3:0] + ADDR[2:0] = SUM[3:0].
3165 *          If SUM[3:0] is in between 1 to 4, we will have this issue.
3166 *
3167 *       2. DAC issue.
3168 *    To detect this issue, following equation can be used...
3169 *          SIZE[3:0] + ADDR[2:0] = SUM[3:0].
3170 *          If SUM[3:0] is in between 9 to c, we will have this issue.
3171 *
3172 *
3173 *    WORKAROUND:
3174 *          Make sure we do not have ending address as 1,2,3,4(Hang) or 9,a,b,c (DAC)
3175 *
3176 *** *********************************************************************/
3177 u_int32_t
3178 em_fill_descriptors(u_int64_t address, u_int32_t length,
3179     PDESC_ARRAY desc_array)
3180 {
3181         /* Since issue is sensitive to length and address.*/
3182         /* Let us first check the address...*/
3183         u_int32_t safe_terminator;
3184         if (length <= 4) {
3185                 desc_array->descriptor[0].address = address;
3186                 desc_array->descriptor[0].length = length;
3187                 desc_array->elements = 1;
3188                 return desc_array->elements;
3189         }
3190         safe_terminator = (u_int32_t)((((u_int32_t)address & 0x7) + (length & 0xF)) & 0xF);
3191         /* if it does not fall between 0x1 to 0x4 and 0x9 to 0xC then return */
3192         if (safe_terminator == 0   ||
3193         (safe_terminator > 4   &&
3194         safe_terminator < 9)   ||
3195         (safe_terminator > 0xC &&
3196         safe_terminator <= 0xF)) {
3197                 desc_array->descriptor[0].address = address;
3198                 desc_array->descriptor[0].length = length;
3199                 desc_array->elements = 1;
3200                 return desc_array->elements;
3201         }
3202 
3203         desc_array->descriptor[0].address = address;
3204         desc_array->descriptor[0].length = length - 4;
3205         desc_array->descriptor[1].address = address + (length - 4);
3206         desc_array->descriptor[1].length = 4;
3207         desc_array->elements = 2;
3208         return desc_array->elements;
3209 }
3210 
3211 /*
3212  * Disable the L0S and L1 LINK states.
3213  */
3214 void
3215 em_disable_aspm(struct em_softc *sc)
3216 {
3217 	int offset;
3218 	pcireg_t val;
3219 
3220 	switch (sc->hw.mac_type) {
3221 		case em_82571:
3222 		case em_82572:
3223 		case em_82573:
3224 		case em_82574:
3225 			break;
3226 		default:
3227 			return;
3228 	}
3229 
3230 	if (!pci_get_capability(sc->osdep.em_pa.pa_pc, sc->osdep.em_pa.pa_tag,
3231 	    PCI_CAP_PCIEXPRESS, &offset, NULL))
3232 		return;
3233 
3234 	/* Disable PCIe Active State Power Management (ASPM). */
3235 	val = pci_conf_read(sc->osdep.em_pa.pa_pc, sc->osdep.em_pa.pa_tag,
3236 	    offset + PCI_PCIE_LCSR);
3237 
3238 	switch (sc->hw.mac_type) {
3239 		case em_82571:
3240 		case em_82572:
3241 			val &= ~PCI_PCIE_LCSR_ASPM_L1;
3242 			break;
3243 		case em_82573:
3244 		case em_82574:
3245 			val &= ~(PCI_PCIE_LCSR_ASPM_L0S |
3246 			    PCI_PCIE_LCSR_ASPM_L1);
3247 			break;
3248 		default:
3249 			break;
3250 	}
3251 
3252 	pci_conf_write(sc->osdep.em_pa.pa_pc, sc->osdep.em_pa.pa_tag,
3253 	    offset + PCI_PCIE_LCSR, val);
3254 }
3255 
3256 /*
3257  * em_flush_tx_ring - remove all descriptors from the tx_ring
3258  *
3259  * We want to clear all pending descriptors from the TX ring.
3260  * zeroing happens when the HW reads the regs. We assign the ring itself as
3261  * the data of the next descriptor. We don't care about the data we are about
3262  * to reset the HW.
3263  */
3264 void
3265 em_flush_tx_ring(struct em_queue *que)
3266 {
3267 	struct em_softc		*sc = que->sc;
3268 	uint32_t		 tctl, txd_lower = E1000_TXD_CMD_IFCS;
3269 	uint16_t		 size = 512;
3270 	struct em_tx_desc	*txd;
3271 
3272 	KASSERT(que->tx.sc_tx_desc_ring != NULL);
3273 
3274 	tctl = EM_READ_REG(&sc->hw, E1000_TCTL);
3275 	EM_WRITE_REG(&sc->hw, E1000_TCTL, tctl | E1000_TCTL_EN);
3276 
3277 	KASSERT(EM_READ_REG(&sc->hw, E1000_TDT(que->me)) == que->tx.sc_tx_desc_head);
3278 
3279 	txd = &que->tx.sc_tx_desc_ring[que->tx.sc_tx_desc_head];
3280 	txd->buffer_addr = que->tx.sc_tx_dma.dma_map->dm_segs[0].ds_addr;
3281 	txd->lower.data = htole32(txd_lower | size);
3282 	txd->upper.data = 0;
3283 
3284 	/* flush descriptors to memory before notifying the HW */
3285 	bus_space_barrier(sc->osdep.mem_bus_space_tag,
3286 	    sc->osdep.mem_bus_space_handle, 0, 0, BUS_SPACE_BARRIER_WRITE);
3287 
3288 	if (++que->tx.sc_tx_desc_head == sc->sc_tx_slots)
3289 		que->tx.sc_tx_desc_head = 0;
3290 
3291 	EM_WRITE_REG(&sc->hw, E1000_TDT(que->me), que->tx.sc_tx_desc_head);
3292 	bus_space_barrier(sc->osdep.mem_bus_space_tag, sc->osdep.mem_bus_space_handle,
3293 	    0, 0, BUS_SPACE_BARRIER_READ|BUS_SPACE_BARRIER_WRITE);
3294 	usec_delay(250);
3295 }
3296 
3297 /*
3298  * em_flush_rx_ring - remove all descriptors from the rx_ring
3299  *
3300  * Mark all descriptors in the RX ring as consumed and disable the rx ring
3301  */
3302 void
3303 em_flush_rx_ring(struct em_queue *que)
3304 {
3305 	uint32_t	rctl, rxdctl;
3306 	struct em_softc	*sc = que->sc;
3307 
3308 	rctl = EM_READ_REG(&sc->hw, E1000_RCTL);
3309 	EM_WRITE_REG(&sc->hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
3310 	E1000_WRITE_FLUSH(&sc->hw);
3311 	usec_delay(150);
3312 
3313 	rxdctl = EM_READ_REG(&sc->hw, E1000_RXDCTL(que->me));
3314 	/* zero the lower 14 bits (prefetch and host thresholds) */
3315 	rxdctl &= 0xffffc000;
3316 	/*
3317 	 * update thresholds: prefetch threshold to 31, host threshold to 1
3318 	 * and make sure the granularity is "descriptors" and not "cache lines"
3319 	 */
3320 	rxdctl |= (0x1F | (1 << 8) | E1000_RXDCTL_THRESH_UNIT_DESC);
3321 	EM_WRITE_REG(&sc->hw, E1000_RXDCTL(que->me), rxdctl);
3322 
3323 	/* momentarily enable the RX ring for the changes to take effect */
3324 	EM_WRITE_REG(&sc->hw, E1000_RCTL, rctl | E1000_RCTL_EN);
3325 	E1000_WRITE_FLUSH(&sc->hw);
3326 	usec_delay(150);
3327 	EM_WRITE_REG(&sc->hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
3328 }
3329 
3330 /*
3331  * em_flush_desc_rings - remove all descriptors from the descriptor rings
3332  *
3333  * In i219, the descriptor rings must be emptied before resetting the HW
3334  * or before changing the device state to D3 during runtime (runtime PM).
3335  *
3336  * Failure to do this will cause the HW to enter a unit hang state which can
3337  * only be released by PCI reset on the device
3338  *
3339  */
3340 void
3341 em_flush_desc_rings(struct em_softc *sc)
3342 {
3343 	struct em_queue		*que = sc->queues; /* Use only first queue. */
3344 	struct pci_attach_args	*pa = &sc->osdep.em_pa;
3345 	uint32_t		 fextnvm11, tdlen;
3346 	uint16_t		 hang_state;
3347 
3348 	/* First, disable MULR fix in FEXTNVM11 */
3349 	fextnvm11 = EM_READ_REG(&sc->hw, E1000_FEXTNVM11);
3350 	fextnvm11 |= E1000_FEXTNVM11_DISABLE_MULR_FIX;
3351 	EM_WRITE_REG(&sc->hw, E1000_FEXTNVM11, fextnvm11);
3352 
3353 	/* do nothing if we're not in faulty state, or if the queue is empty */
3354 	tdlen = EM_READ_REG(&sc->hw, E1000_TDLEN(que->me));
3355 	hang_state = pci_conf_read(pa->pa_pc, pa->pa_tag, PCICFG_DESC_RING_STATUS);
3356 	if (!(hang_state & FLUSH_DESC_REQUIRED) || !tdlen)
3357 		return;
3358 	em_flush_tx_ring(que);
3359 
3360 	/* recheck, maybe the fault is caused by the rx ring */
3361 	hang_state = pci_conf_read(pa->pa_pc, pa->pa_tag, PCICFG_DESC_RING_STATUS);
3362 	if (hang_state & FLUSH_DESC_REQUIRED)
3363 		em_flush_rx_ring(que);
3364 }
3365 
3366 int
3367 em_allocate_legacy(struct em_softc *sc)
3368 {
3369 	pci_intr_handle_t	 ih;
3370 	const char		*intrstr = NULL;
3371 	struct pci_attach_args	*pa = &sc->osdep.em_pa;
3372 	pci_chipset_tag_t	 pc = pa->pa_pc;
3373 
3374 	if (pci_intr_map_msi(pa, &ih)) {
3375 		if (pci_intr_map(pa, &ih)) {
3376 			printf(": couldn't map interrupt\n");
3377 			return (ENXIO);
3378 		}
3379 		sc->legacy_irq = 1;
3380 	}
3381 
3382 	intrstr = pci_intr_string(pc, ih);
3383 	sc->sc_intrhand = pci_intr_establish(pc, ih, IPL_NET | IPL_MPSAFE,
3384 	    em_intr, sc, DEVNAME(sc));
3385 	if (sc->sc_intrhand == NULL) {
3386 		printf(": couldn't establish interrupt");
3387 		if (intrstr != NULL)
3388 			printf(" at %s", intrstr);
3389 		printf("\n");
3390 		return (ENXIO);
3391 	}
3392 	printf(": %s", intrstr);
3393 
3394 	return (0);
3395 }
3396 
3397 #if NKSTAT > 0
3398 /* this is used to look up the array of kstats quickly */
3399 enum em_stat {
3400 	em_stat_crcerrs,
3401 	em_stat_algnerrc,
3402 	em_stat_symerrs,
3403 	em_stat_rxerrc,
3404 	em_stat_mpc,
3405 	em_stat_scc,
3406 	em_stat_ecol,
3407 	em_stat_mcc,
3408 	em_stat_latecol,
3409 	em_stat_colc,
3410 	em_stat_dc,
3411 	em_stat_tncrs,
3412 	em_stat_sec,
3413 	em_stat_cexterr,
3414 	em_stat_rlec,
3415 	em_stat_xonrxc,
3416 	em_stat_xontxc,
3417 	em_stat_xoffrxc,
3418 	em_stat_xofftxc,
3419 	em_stat_fcruc,
3420 	em_stat_prc64,
3421 	em_stat_prc127,
3422 	em_stat_prc255,
3423 	em_stat_prc511,
3424 	em_stat_prc1023,
3425 	em_stat_prc1522,
3426 	em_stat_gprc,
3427 	em_stat_bprc,
3428 	em_stat_mprc,
3429 	em_stat_gptc,
3430 	em_stat_gorc,
3431 	em_stat_gotc,
3432 	em_stat_rnbc,
3433 	em_stat_ruc,
3434 	em_stat_rfc,
3435 	em_stat_roc,
3436 	em_stat_rjc,
3437 	em_stat_mgtprc,
3438 	em_stat_mgtpdc,
3439 	em_stat_mgtptc,
3440 	em_stat_tor,
3441 	em_stat_tot,
3442 	em_stat_tpr,
3443 	em_stat_tpt,
3444 	em_stat_ptc64,
3445 	em_stat_ptc127,
3446 	em_stat_ptc255,
3447 	em_stat_ptc511,
3448 	em_stat_ptc1023,
3449 	em_stat_ptc1522,
3450 	em_stat_mptc,
3451 	em_stat_bptc,
3452 #if 0
3453 	em_stat_tsctc,
3454 	em_stat_tsctf,
3455 #endif
3456 
3457 	em_stat_count,
3458 };
3459 
3460 struct em_counter {
3461 	const char		*name;
3462 	enum kstat_kv_unit	 unit;
3463 	uint32_t		 reg;
3464 };
3465 
3466 static const struct em_counter em_counters[em_stat_count] = {
3467 	[em_stat_crcerrs] =
3468 	    { "rx crc errs",	KSTAT_KV_U_PACKETS,	E1000_CRCERRS },
3469 	[em_stat_algnerrc] = /* >= em_82543 */
3470 	    { "rx align errs",	KSTAT_KV_U_PACKETS,	0 },
3471 	[em_stat_symerrs] = /* >= em_82543 */
3472 	    { "rx align errs",	KSTAT_KV_U_PACKETS,	0 },
3473 	[em_stat_rxerrc] =
3474 	    { "rx errs",	KSTAT_KV_U_PACKETS,	E1000_RXERRC },
3475 	[em_stat_mpc] =
3476 	    { "rx missed",	KSTAT_KV_U_PACKETS,	E1000_MPC },
3477 	[em_stat_scc] =
3478 	    { "tx single coll",	KSTAT_KV_U_PACKETS,	E1000_SCC },
3479 	[em_stat_ecol] =
3480 	    { "tx excess coll",	KSTAT_KV_U_PACKETS,	E1000_ECOL },
3481 	[em_stat_mcc] =
3482 	    { "tx multi coll",	KSTAT_KV_U_PACKETS,	E1000_MCC },
3483 	[em_stat_latecol] =
3484 	    { "tx late coll",	KSTAT_KV_U_PACKETS,	E1000_LATECOL },
3485 	[em_stat_colc] =
3486 	    { "tx coll",	KSTAT_KV_U_NONE,	E1000_COLC },
3487 	[em_stat_dc] =
3488 	    { "tx defers",	KSTAT_KV_U_NONE,	E1000_DC },
3489 	[em_stat_tncrs] = /* >= em_82543 */
3490 	    { "tx no CRS",	KSTAT_KV_U_PACKETS,	0 },
3491 	[em_stat_sec] =
3492 	    { "seq errs",	KSTAT_KV_U_NONE,	E1000_SEC },
3493 	[em_stat_cexterr] = /* >= em_82543 */
3494 	    { "carr ext errs",	KSTAT_KV_U_PACKETS,	0 },
3495 	[em_stat_rlec] =
3496 	    { "rx len errs",	KSTAT_KV_U_PACKETS,	E1000_RLEC },
3497 	[em_stat_xonrxc] =
3498 	    { "rx xon",		KSTAT_KV_U_PACKETS,	E1000_XONRXC },
3499 	[em_stat_xontxc] =
3500 	    { "tx xon",		KSTAT_KV_U_PACKETS,	E1000_XONTXC },
3501 	[em_stat_xoffrxc] =
3502 	    { "rx xoff",	KSTAT_KV_U_PACKETS,	E1000_XOFFRXC },
3503 	[em_stat_xofftxc] =
3504 	    { "tx xoff",	KSTAT_KV_U_PACKETS,	E1000_XOFFTXC },
3505 	[em_stat_fcruc] =
3506 	    { "FC unsupported",	KSTAT_KV_U_PACKETS,	E1000_FCRUC },
3507 	[em_stat_prc64] =
3508 	    { "rx 64B",		KSTAT_KV_U_PACKETS,	E1000_PRC64 },
3509 	[em_stat_prc127] =
3510 	    { "rx 65-127B",	KSTAT_KV_U_PACKETS,	E1000_PRC127 },
3511 	[em_stat_prc255] =
3512 	    { "rx 128-255B",	KSTAT_KV_U_PACKETS,	E1000_PRC255 },
3513 	[em_stat_prc511] =
3514 	    { "rx 256-511B",	KSTAT_KV_U_PACKETS,	E1000_PRC511 },
3515 	[em_stat_prc1023] =
3516 	    { "rx 512-1023B",	KSTAT_KV_U_PACKETS,	E1000_PRC1023 },
3517 	[em_stat_prc1522] =
3518 	    { "rx 1024-maxB",	KSTAT_KV_U_PACKETS,	E1000_PRC1522 },
3519 	[em_stat_gprc] =
3520 	    { "rx good",	KSTAT_KV_U_PACKETS,	E1000_GPRC },
3521 	[em_stat_bprc] =
3522 	    { "rx bcast",	KSTAT_KV_U_PACKETS,	E1000_BPRC },
3523 	[em_stat_mprc] =
3524 	    { "rx mcast",	KSTAT_KV_U_PACKETS,	E1000_MPRC },
3525 	[em_stat_gptc] =
3526 	    { "tx good",	KSTAT_KV_U_PACKETS,	E1000_GPTC },
3527 	[em_stat_gorc] = /* 64bit */
3528 	    { "rx good",	KSTAT_KV_U_BYTES,	0 },
3529 	[em_stat_gotc] = /* 64bit */
3530 	    { "tx good",	KSTAT_KV_U_BYTES,	0 },
3531 	[em_stat_rnbc] =
3532 	    { "rx no buffers",	KSTAT_KV_U_PACKETS,	E1000_RNBC },
3533 	[em_stat_ruc] =
3534 	    { "rx undersize",	KSTAT_KV_U_PACKETS,	E1000_RUC },
3535 	[em_stat_rfc] =
3536 	    { "rx fragments",	KSTAT_KV_U_PACKETS,	E1000_RFC },
3537 	[em_stat_roc] =
3538 	    { "rx oversize",	KSTAT_KV_U_PACKETS,	E1000_ROC },
3539 	[em_stat_rjc] =
3540 	    { "rx jabbers",	KSTAT_KV_U_PACKETS,	E1000_RJC },
3541 	[em_stat_mgtprc] =
3542 	    { "rx mgmt",	KSTAT_KV_U_PACKETS,	E1000_MGTPRC },
3543 	[em_stat_mgtpdc] =
3544 	    { "rx mgmt drops",	KSTAT_KV_U_PACKETS,	E1000_MGTPDC },
3545 	[em_stat_mgtptc] =
3546 	    { "tx mgmt",	KSTAT_KV_U_PACKETS,	E1000_MGTPTC },
3547 	[em_stat_tor] = /* 64bit */
3548 	    { "rx total",	KSTAT_KV_U_BYTES,	0 },
3549 	[em_stat_tot] = /* 64bit */
3550 	    { "tx total",	KSTAT_KV_U_BYTES,	0 },
3551 	[em_stat_tpr] =
3552 	    { "rx total",	KSTAT_KV_U_PACKETS,	E1000_TPR },
3553 	[em_stat_tpt] =
3554 	    { "tx total",	KSTAT_KV_U_PACKETS,	E1000_TPT },
3555 	[em_stat_ptc64] =
3556 	    { "tx 64B",		KSTAT_KV_U_PACKETS,	E1000_PTC64 },
3557 	[em_stat_ptc127] =
3558 	    { "tx 65-127B",	KSTAT_KV_U_PACKETS,	E1000_PTC127 },
3559 	[em_stat_ptc255] =
3560 	    { "tx 128-255B",	KSTAT_KV_U_PACKETS,	E1000_PTC255 },
3561 	[em_stat_ptc511] =
3562 	    { "tx 256-511B",	KSTAT_KV_U_PACKETS,	E1000_PTC511 },
3563 	[em_stat_ptc1023] =
3564 	    { "tx 512-1023B",	KSTAT_KV_U_PACKETS,	E1000_PTC1023 },
3565 	[em_stat_ptc1522] =
3566 	    { "tx 1024-maxB",	KSTAT_KV_U_PACKETS,	E1000_PTC1522 },
3567 	[em_stat_mptc] =
3568 	    { "tx mcast",	KSTAT_KV_U_PACKETS,	E1000_MPTC },
3569 	[em_stat_bptc] =
3570 	    { "tx bcast",	KSTAT_KV_U_PACKETS,	E1000_BPTC },
3571 };
3572 
3573 /**********************************************************************
3574  *
3575  *  Update the board statistics counters.
3576  *
3577  **********************************************************************/
3578 int
3579 em_kstat_read(struct kstat *ks)
3580 {
3581 	struct em_softc *sc = ks->ks_softc;
3582 	struct em_hw *hw = &sc->hw;
3583 	struct kstat_kv *kvs = ks->ks_data;
3584 	uint32_t lo, hi;
3585 	unsigned int i;
3586 
3587 	for (i = 0; i < nitems(em_counters); i++) {
3588 		const struct em_counter *c = &em_counters[i];
3589 		if (c->reg == 0)
3590 			continue;
3591 
3592 		kstat_kv_u64(&kvs[i]) += EM_READ_REG(hw,
3593 		    E1000_REG_TR(hw, c->reg)); /* wtf */
3594 	}
3595 
3596 	/* Handle the exceptions. */
3597 
3598 	if (sc->hw.mac_type >= em_82543) {
3599 		kstat_kv_u64(&kvs[em_stat_algnerrc]) +=
3600 		    E1000_READ_REG(hw, ALGNERRC);
3601 		kstat_kv_u64(&kvs[em_stat_rxerrc]) +=
3602 		    E1000_READ_REG(hw, RXERRC);
3603 		kstat_kv_u64(&kvs[em_stat_cexterr]) +=
3604 		    E1000_READ_REG(hw, CEXTERR);
3605 		kstat_kv_u64(&kvs[em_stat_tncrs]) +=
3606 		    E1000_READ_REG(hw, TNCRS);
3607 #if 0
3608 		sc->stats.tsctc +=
3609 		E1000_READ_REG(hw, TSCTC);
3610 		sc->stats.tsctfc +=
3611 		E1000_READ_REG(hw, TSCTFC);
3612 #endif
3613 	}
3614 
3615 	/* For the 64-bit byte counters the low dword must be read first. */
3616 	/* Both registers clear on the read of the high dword */
3617 
3618 	lo = E1000_READ_REG(hw, GORCL);
3619 	hi = E1000_READ_REG(hw, GORCH);
3620 	kstat_kv_u64(&kvs[em_stat_gorc]) +=
3621 	    ((uint64_t)hi << 32) | (uint64_t)lo;
3622 
3623 	lo = E1000_READ_REG(hw, GOTCL);
3624 	hi = E1000_READ_REG(hw, GOTCH);
3625 	kstat_kv_u64(&kvs[em_stat_gotc]) +=
3626 	    ((uint64_t)hi << 32) | (uint64_t)lo;
3627 
3628 	lo = E1000_READ_REG(hw, TORL);
3629 	hi = E1000_READ_REG(hw, TORH);
3630 	kstat_kv_u64(&kvs[em_stat_tor]) +=
3631 	    ((uint64_t)hi << 32) | (uint64_t)lo;
3632 
3633 	lo = E1000_READ_REG(hw, TOTL);
3634 	hi = E1000_READ_REG(hw, TOTH);
3635 	kstat_kv_u64(&kvs[em_stat_tot]) +=
3636 	    ((uint64_t)hi << 32) | (uint64_t)lo;
3637 
3638 	getnanouptime(&ks->ks_updated);
3639 
3640 	return (0);
3641 }
3642 
3643 void
3644 em_kstat_attach(struct em_softc *sc)
3645 {
3646 	struct kstat *ks;
3647 	struct kstat_kv *kvs;
3648 	unsigned int i;
3649 
3650 	mtx_init(&sc->kstat_mtx, IPL_SOFTCLOCK);
3651 
3652 	ks = kstat_create(DEVNAME(sc), 0, "em-stats", 0,
3653 	    KSTAT_T_KV, 0);
3654 	if (ks == NULL)
3655 		return;
3656 
3657 	kvs = mallocarray(nitems(em_counters), sizeof(*kvs),
3658 	    M_DEVBUF, M_WAITOK|M_ZERO);
3659 	for (i = 0; i < nitems(em_counters); i++) {
3660 		const struct em_counter *c = &em_counters[i];
3661 		kstat_kv_unit_init(&kvs[i], c->name,
3662 		    KSTAT_KV_T_COUNTER64, c->unit);
3663 	}
3664 
3665 	ks->ks_softc = sc;
3666 	ks->ks_data = kvs;
3667 	ks->ks_datalen = nitems(em_counters) * sizeof(*kvs);
3668 	ks->ks_read = em_kstat_read;
3669 	kstat_set_mutex(ks, &sc->kstat_mtx);
3670 
3671 	kstat_install(ks);
3672 }
3673 
3674 /******************************************************************************
3675  * Adjusts the statistic counters when a frame is accepted by TBI_ACCEPT
3676  *****************************************************************************/
3677 void
3678 em_tbi_adjust_stats(struct em_softc *sc, uint32_t frame_len, uint8_t *mac_addr)
3679 {
3680 	struct em_hw *hw = &sc->hw;
3681 	struct kstat *ks = sc->kstat;
3682 	struct kstat_kv *kvs;
3683 
3684 	if (ks == NULL)
3685 		return;
3686 
3687 	/* First adjust the frame length. */
3688 	frame_len--;
3689 
3690 	mtx_enter(&sc->kstat_mtx);
3691 	kvs = ks->ks_data;
3692 
3693 	/*
3694 	 * We need to adjust the statistics counters, since the hardware
3695 	 * counters overcount this packet as a CRC error and undercount the
3696 	 * packet as a good packet
3697 	 */
3698 
3699 	/* This packet should not be counted as a CRC error.	*/
3700 	kstat_kv_u64(&kvs[em_stat_crcerrs])--;
3701 	/* This packet does count as a Good Packet Received.	*/
3702 	kstat_kv_u64(&kvs[em_stat_gprc])++;
3703 
3704 	/* Adjust the Good Octets received counters		*/
3705 	kstat_kv_u64(&kvs[em_stat_gorc]) += frame_len;
3706 
3707 	/*
3708 	 * Is this a broadcast or multicast?  Check broadcast first, since
3709 	 * the test for a multicast frame will test positive on a broadcast
3710 	 * frame.
3711 	 */
3712 	if (ETHER_IS_BROADCAST(mac_addr)) {
3713 		/* Broadcast packet */
3714 		kstat_kv_u64(&kvs[em_stat_bprc])++;
3715 	} else if (ETHER_IS_MULTICAST(mac_addr)) {
3716 		/* Multicast packet */
3717 		kstat_kv_u64(&kvs[em_stat_mprc])++;
3718 	}
3719 
3720 	if (frame_len == hw->max_frame_size) {
3721 		/*
3722 		 * In this case, the hardware has overcounted the number of
3723 		 * oversize frames.
3724 		 */
3725 		kstat_kv_u64(&kvs[em_stat_roc])--;
3726 	}
3727 
3728 	/*
3729 	 * Adjust the bin counters when the extra byte put the frame in the
3730 	 * wrong bin. Remember that the frame_len was adjusted above.
3731 	 */
3732 	if (frame_len == 64) {
3733 		kstat_kv_u64(&kvs[em_stat_prc64])++;
3734 		kstat_kv_u64(&kvs[em_stat_prc127])--;
3735 	} else if (frame_len == 127) {
3736 		kstat_kv_u64(&kvs[em_stat_prc127])++;
3737 		kstat_kv_u64(&kvs[em_stat_prc255])--;
3738 	} else if (frame_len == 255) {
3739 		kstat_kv_u64(&kvs[em_stat_prc255])++;
3740 		kstat_kv_u64(&kvs[em_stat_prc511])--;
3741 	} else if (frame_len == 511) {
3742 		kstat_kv_u64(&kvs[em_stat_prc511])++;
3743 		kstat_kv_u64(&kvs[em_stat_prc1023])--;
3744 	} else if (frame_len == 1023) {
3745 		kstat_kv_u64(&kvs[em_stat_prc1023])++;
3746 		kstat_kv_u64(&kvs[em_stat_prc1522])--;
3747 	} else if (frame_len == 1522) {
3748 		kstat_kv_u64(&kvs[em_stat_prc1522])++;
3749 	}
3750 
3751 	mtx_leave(&sc->kstat_mtx);
3752 }
3753 #endif /* NKSTAT > 0 */
3754 
3755 #ifndef SMALL_KERNEL
3756 int
3757 em_allocate_msix(struct em_softc *sc)
3758 {
3759 	pci_intr_handle_t	 ih;
3760 	const char		*intrstr = NULL;
3761 	struct pci_attach_args	*pa = &sc->osdep.em_pa;
3762 	pci_chipset_tag_t	 pc = pa->pa_pc;
3763 	struct em_queue		*que = sc->queues; /* Use only first queue. */
3764 	int			 vec;
3765 
3766 	if (!em_enable_msix)
3767 		return (ENODEV);
3768 
3769 	switch (sc->hw.mac_type) {
3770 	case em_82576:
3771 	case em_82580:
3772 	case em_i350:
3773 	case em_i210:
3774 		break;
3775 	default:
3776 		return (ENODEV);
3777 	}
3778 
3779 	vec = 0;
3780 	if (pci_intr_map_msix(pa, vec, &ih))
3781 		return (ENODEV);
3782 	sc->msix = 1;
3783 
3784 	que->me = vec;
3785 	que->eims = 1 << vec;
3786 	snprintf(que->name, sizeof(que->name), "%s:%d", DEVNAME(sc), vec);
3787 
3788 	intrstr = pci_intr_string(pc, ih);
3789 	que->tag = pci_intr_establish(pc, ih, IPL_NET | IPL_MPSAFE,
3790 	    em_queue_intr_msix, que, que->name);
3791 	if (que->tag == NULL) {
3792 		printf(": couldn't establish interrupt");
3793 		if (intrstr != NULL)
3794 			printf(" at %s", intrstr);
3795 		printf("\n");
3796 		return (ENXIO);
3797 	}
3798 
3799 	/* Setup linkvector, use last queue vector + 1 */
3800 	vec++;
3801 	sc->msix_linkvec = vec;
3802 	if (pci_intr_map_msix(pa, sc->msix_linkvec, &ih)) {
3803 		printf(": couldn't map link vector\n");
3804 		return (ENXIO);
3805 	}
3806 
3807 	intrstr = pci_intr_string(pc, ih);
3808 	sc->sc_intrhand = pci_intr_establish(pc, ih, IPL_NET | IPL_MPSAFE,
3809 	    em_link_intr_msix, sc, DEVNAME(sc));
3810 	if (sc->sc_intrhand == NULL) {
3811 		printf(": couldn't establish interrupt");
3812 		if (intrstr != NULL)
3813 			printf(" at %s", intrstr);
3814 		printf("\n");
3815 		return (ENXIO);
3816 	}
3817 	printf(", %s, %d queue%s", intrstr, vec, (vec > 1) ? "s" : "");
3818 
3819 	return (0);
3820 }
3821 
3822 /*
3823  * Interrupt for a specific queue, (not link interrupts). The EICR bit which
3824  * maps to the EIMS bit expresses both RX and TX, therefore we can't
3825  * distringuish if this is a RX completion of TX completion and must do both.
3826  * The bits in EICR are autocleared and we _cannot_ read EICR.
3827  */
3828 int
3829 em_queue_intr_msix(void *vque)
3830 {
3831 	struct em_queue *que = vque;
3832 	struct em_softc *sc = que->sc;
3833 	struct ifnet   *ifp = &sc->sc_ac.ac_if;
3834 
3835 	if (ifp->if_flags & IFF_RUNNING) {
3836 		em_txeof(que);
3837 		if (em_rxeof(que))
3838 			em_rxrefill(que);
3839 	}
3840 
3841 	em_enable_queue_intr_msix(que);
3842 
3843 	return (1);
3844 }
3845 
3846 int
3847 em_link_intr_msix(void *arg)
3848 {
3849 	struct em_softc *sc = arg;
3850 	uint32_t icr;
3851 
3852 	icr = E1000_READ_REG(&sc->hw, ICR);
3853 
3854 	/* Link status change */
3855 	if (icr & E1000_ICR_LSC) {
3856 		KERNEL_LOCK();
3857 		sc->hw.get_link_status = 1;
3858 		em_check_for_link(&sc->hw);
3859 		em_update_link_status(sc);
3860 		KERNEL_UNLOCK();
3861 	}
3862 
3863 	/* Re-arm unconditionally */
3864 	E1000_WRITE_REG(&sc->hw, IMS, E1000_ICR_LSC);
3865 	E1000_WRITE_REG(&sc->hw, EIMS, sc->msix_linkmask);
3866 
3867 	return (1);
3868 }
3869 
3870 /*
3871  * Maps queues into msix interrupt vectors.
3872  */
3873 int
3874 em_setup_queues_msix(struct em_softc *sc)
3875 {
3876 	uint32_t ivar, newitr, index;
3877 	struct em_queue *que;
3878 
3879 	KASSERT(sc->msix);
3880 
3881 	/* First turn on RSS capability */
3882 	if (sc->hw.mac_type != em_82575)
3883 		E1000_WRITE_REG(&sc->hw, GPIE,
3884 		    E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
3885 		    E1000_GPIE_PBA | E1000_GPIE_NSICR);
3886 
3887 	/* Turn on MSIX */
3888 	switch (sc->hw.mac_type) {
3889 	case em_82580:
3890 	case em_i350:
3891 	case em_i210:
3892 		/* RX entries */
3893 		/*
3894 		 * Note, this maps Queues into MSIX vectors, it works fine.
3895 		 * The funky calculation of offsets and checking if que->me is
3896 		 * odd is due to the weird register distribution, the datasheet
3897 		 * explains it well.
3898 		 */
3899 		FOREACH_QUEUE(sc, que) {
3900 			index = que->me >> 1;
3901 			ivar = E1000_READ_REG_ARRAY(&sc->hw, IVAR0, index);
3902 			if (que->me & 1) {
3903 				ivar &= 0xFF00FFFF;
3904 				ivar |= (que->me | E1000_IVAR_VALID) << 16;
3905 			} else {
3906 				ivar &= 0xFFFFFF00;
3907 				ivar |= que->me | E1000_IVAR_VALID;
3908 			}
3909 			E1000_WRITE_REG_ARRAY(&sc->hw, IVAR0, index, ivar);
3910 		}
3911 
3912 		/* TX entries */
3913 		FOREACH_QUEUE(sc, que) {
3914 			index = que->me >> 1;
3915 			ivar = E1000_READ_REG_ARRAY(&sc->hw, IVAR0, index);
3916 			if (que->me & 1) {
3917 				ivar &= 0x00FFFFFF;
3918 				ivar |= (que->me | E1000_IVAR_VALID) << 24;
3919 			} else {
3920 				ivar &= 0xFFFF00FF;
3921 				ivar |= (que->me | E1000_IVAR_VALID) << 8;
3922 			}
3923 			E1000_WRITE_REG_ARRAY(&sc->hw, IVAR0, index, ivar);
3924 			sc->msix_queuesmask |= que->eims;
3925 		}
3926 
3927 		/* And for the link interrupt */
3928 		ivar = (sc->msix_linkvec | E1000_IVAR_VALID) << 8;
3929 		sc->msix_linkmask = 1 << sc->msix_linkvec;
3930 		E1000_WRITE_REG(&sc->hw, IVAR_MISC, ivar);
3931 		break;
3932 	case em_82576:
3933 		/* RX entries */
3934 		FOREACH_QUEUE(sc, que) {
3935 			index = que->me & 0x7; /* Each IVAR has two entries */
3936 			ivar = E1000_READ_REG_ARRAY(&sc->hw, IVAR0, index);
3937 			if (que->me < 8) {
3938 				ivar &= 0xFFFFFF00;
3939 				ivar |= que->me | E1000_IVAR_VALID;
3940 			} else {
3941 				ivar &= 0xFF00FFFF;
3942 				ivar |= (que->me | E1000_IVAR_VALID) << 16;
3943 			}
3944 			E1000_WRITE_REG_ARRAY(&sc->hw, IVAR0, index, ivar);
3945 			sc->msix_queuesmask |= que->eims;
3946 		}
3947 		/* TX entries */
3948 		FOREACH_QUEUE(sc, que) {
3949 			index = que->me & 0x7; /* Each IVAR has two entries */
3950 			ivar = E1000_READ_REG_ARRAY(&sc->hw, IVAR0, index);
3951 			if (que->me < 8) {
3952 				ivar &= 0xFFFF00FF;
3953 				ivar |= (que->me | E1000_IVAR_VALID) << 8;
3954 			} else {
3955 				ivar &= 0x00FFFFFF;
3956 				ivar |= (que->me | E1000_IVAR_VALID) << 24;
3957 			}
3958 			E1000_WRITE_REG_ARRAY(&sc->hw, IVAR0, index, ivar);
3959 			sc->msix_queuesmask |= que->eims;
3960 		}
3961 
3962 		/* And for the link interrupt */
3963 		ivar = (sc->msix_linkvec | E1000_IVAR_VALID) << 8;
3964 		sc->msix_linkmask = 1 << sc->msix_linkvec;
3965 		E1000_WRITE_REG(&sc->hw, IVAR_MISC, ivar);
3966 		break;
3967 	default:
3968 		panic("unsupported mac");
3969 		break;
3970 	}
3971 
3972 	/* Set the starting interrupt rate */
3973 	newitr = (4000000 / MAX_INTS_PER_SEC) & 0x7FFC;
3974 
3975 	if (sc->hw.mac_type == em_82575)
3976 		newitr |= newitr << 16;
3977 	else
3978 		newitr |= E1000_EITR_CNT_IGNR;
3979 
3980 	FOREACH_QUEUE(sc, que)
3981 		E1000_WRITE_REG(&sc->hw, EITR(que->me), newitr);
3982 
3983 	return (0);
3984 }
3985 
3986 void
3987 em_enable_queue_intr_msix(struct em_queue *que)
3988 {
3989 	E1000_WRITE_REG(&que->sc->hw, EIMS, que->eims);
3990 }
3991 #endif /* !SMALL_KERNEL */
3992 
3993 int
3994 em_allocate_desc_rings(struct em_softc *sc)
3995 {
3996 	struct em_queue *que;
3997 
3998 	FOREACH_QUEUE(sc, que) {
3999 		/* Allocate Transmit Descriptor ring */
4000 		if (em_dma_malloc(sc, sc->sc_tx_slots * sizeof(struct em_tx_desc),
4001 		    &que->tx.sc_tx_dma) != 0) {
4002 			printf("%s: Unable to allocate tx_desc memory\n",
4003 			    DEVNAME(sc));
4004 			return (ENOMEM);
4005 		}
4006 		que->tx.sc_tx_desc_ring =
4007 		    (struct em_tx_desc *)que->tx.sc_tx_dma.dma_vaddr;
4008 
4009 		/* Allocate Receive Descriptor ring */
4010 		if (em_dma_malloc(sc, sc->sc_rx_slots * sizeof(struct em_rx_desc),
4011 		    &que->rx.sc_rx_dma) != 0) {
4012 			printf("%s: Unable to allocate rx_desc memory\n",
4013 			    DEVNAME(sc));
4014 			return (ENOMEM);
4015 		}
4016 		que->rx.sc_rx_desc_ring =
4017 		    (struct em_rx_desc *)que->rx.sc_rx_dma.dma_vaddr;
4018 	}
4019 
4020 	return (0);
4021 }
4022