xref: /openbsd-src/sys/dev/pci/if_em.c (revision c020cf82e0cc147236f01a8dca7052034cf9d30d)
1 /**************************************************************************
2 
3 Copyright (c) 2001-2003, Intel Corporation
4 All rights reserved.
5 
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8 
9  1. Redistributions of source code must retain the above copyright notice,
10     this list of conditions and the following disclaimer.
11 
12  2. Redistributions in binary form must reproduce the above copyright
13     notice, this list of conditions and the following disclaimer in the
14     documentation and/or other materials provided with the distribution.
15 
16  3. Neither the name of the Intel Corporation nor the names of its
17     contributors may be used to endorse or promote products derived from
18     this software without specific prior written permission.
19 
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 POSSIBILITY OF SUCH DAMAGE.
31 
32 ***************************************************************************/
33 
34 /* $OpenBSD: if_em.c,v 1.354 2020/06/22 02:31:32 dlg Exp $ */
35 /* $FreeBSD: if_em.c,v 1.46 2004/09/29 18:28:28 mlaier Exp $ */
36 
37 #include <dev/pci/if_em.h>
38 #include <dev/pci/if_em_soc.h>
39 
40 /*********************************************************************
41  *  Driver version
42  *********************************************************************/
43 
44 #define EM_DRIVER_VERSION	"6.2.9"
45 
46 /*********************************************************************
47  *  PCI Device ID Table
48  *********************************************************************/
49 const struct pci_matchid em_devices[] = {
50 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_80003ES2LAN_CPR_DPT },
51 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_80003ES2LAN_SDS_DPT },
52 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_80003ES2LAN_CPR_SPT },
53 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_80003ES2LAN_SDS_SPT },
54 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82540EM },
55 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82540EM_LOM },
56 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82540EP },
57 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82540EP_LOM },
58 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82540EP_LP },
59 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82541EI },
60 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82541EI_MOBILE },
61 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82541ER },
62 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82541ER_LOM },
63 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82541GI },
64 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82541GI_LF },
65 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82541GI_MOBILE },
66 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82542 },
67 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82543GC_COPPER },
68 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82543GC_FIBER },
69 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82544EI_COPPER },
70 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82544EI_FIBER },
71 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82544GC_COPPER },
72 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82544GC_LOM },
73 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82545EM_COPPER },
74 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82545EM_FIBER },
75 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82545GM_COPPER },
76 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82545GM_FIBER },
77 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82545GM_SERDES },
78 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82546EB_COPPER },
79 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82546EB_FIBER },
80 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82546EB_QUAD_CPR },
81 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82546GB_COPPER },
82 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82546GB_FIBER },
83 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82546GB_PCIE },
84 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82546GB_QUAD_CPR },
85 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82546GB_QUAD_CPR_K },
86 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82546GB_SERDES },
87 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82546GB_2 },
88 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82547EI },
89 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82547EI_MOBILE },
90 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82547GI },
91 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82571EB_AF },
92 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82571EB_AT },
93 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82571EB_COPPER },
94 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82571EB_FIBER },
95 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82571EB_QUAD_CPR },
96 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82571EB_QUAD_CPR_LP },
97 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82571EB_QUAD_FBR },
98 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82571EB_SERDES },
99 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82571EB_SDS_DUAL },
100 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82571EB_SDS_QUAD },
101 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82571PT_QUAD_CPR },
102 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82572EI_COPPER },
103 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82572EI_FIBER },
104 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82572EI_SERDES },
105 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82572EI },
106 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82573E },
107 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82573E_IAMT },
108 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82573E_PM },
109 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82573L },
110 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82573L_PL_1 },
111 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82573L_PL_2 },
112 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82573V_PM },
113 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82574L },
114 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82574LA },
115 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82575EB_COPPER },
116 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82575EB_SERDES },
117 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82575GB_QUAD_CPR },
118 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82575GB_QP_PM },
119 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82576 },
120 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82576_FIBER },
121 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82576_SERDES },
122 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82576_QUAD_COPPER },
123 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82576_QUAD_CU_ET2 },
124 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82576_NS },
125 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82576_NS_SERDES },
126 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82576_SERDES_QUAD },
127 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82577LC },
128 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82577LM },
129 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82578DC },
130 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82578DM },
131 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82579LM },
132 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82579V },
133 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I210_COPPER },
134 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I210_COPPER_OEM1 },
135 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I210_COPPER_IT },
136 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I210_FIBER },
137 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I210_SERDES },
138 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I210_SGMII },
139 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I210_COPPER_NF },
140 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I210_SERDES_NF },
141 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I211_COPPER },
142 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I217_LM },
143 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I217_V },
144 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I218_LM },
145 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I218_LM_2 },
146 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I218_LM_3 },
147 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I218_V },
148 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I218_V_2 },
149 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I218_V_3 },
150 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM },
151 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM2 },
152 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM3 },
153 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM4 },
154 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM5 },
155 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM6 },
156 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM7 },
157 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM8 },
158 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM9 },
159 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM10 },
160 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM11 },
161 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM12 },
162 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM13 },
163 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM14 },
164 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM15 },
165 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V },
166 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V2 },
167 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V4 },
168 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V5 },
169 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V6 },
170 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V7 },
171 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V8 },
172 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V9 },
173 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V10 },
174 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V11 },
175 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V12 },
176 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V13 },
177 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V14 },
178 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82580_COPPER },
179 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82580_FIBER },
180 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82580_SERDES },
181 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82580_SGMII },
182 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82580_COPPER_DUAL },
183 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82580_QUAD_FIBER },
184 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_DH89XXCC_SGMII },
185 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_DH89XXCC_SERDES },
186 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_DH89XXCC_BPLANE },
187 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_DH89XXCC_SFP },
188 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82583V },
189 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I350_COPPER },
190 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I350_FIBER },
191 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I350_SERDES },
192 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I350_SGMII },
193 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I354_BP_1GBPS },
194 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I354_BP_2_5GBPS },
195 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I354_SGMII },
196 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH8_82567V_3 },
197 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH8_IFE },
198 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH8_IFE_G },
199 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH8_IFE_GT },
200 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH8_IGP_AMT },
201 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH8_IGP_C },
202 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH8_IGP_M },
203 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH8_IGP_M_AMT },
204 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH9_BM },
205 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH9_IFE },
206 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH9_IFE_G },
207 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH9_IFE_GT },
208 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH9_IGP_AMT },
209 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH9_IGP_C },
210 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH9_IGP_M },
211 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH9_IGP_M_AMT },
212 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH9_IGP_M_V },
213 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH10_D_BM_LF },
214 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH10_D_BM_LM },
215 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH10_D_BM_V },
216 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH10_R_BM_LF },
217 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH10_R_BM_LM },
218 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH10_R_BM_V },
219 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_EP80579_LAN_1 },
220 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_EP80579_LAN_2 },
221 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_EP80579_LAN_3 },
222 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_EP80579_LAN_4 },
223 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_EP80579_LAN_5 },
224 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_EP80579_LAN_6 }
225 };
226 
227 /*********************************************************************
228  *  Function prototypes
229  *********************************************************************/
230 int  em_probe(struct device *, void *, void *);
231 void em_attach(struct device *, struct device *, void *);
232 void em_defer_attach(struct device*);
233 int  em_detach(struct device *, int);
234 int  em_activate(struct device *, int);
235 int  em_intr(void *);
236 int  em_allocate_legacy(struct em_softc *);
237 void em_start(struct ifqueue *);
238 int  em_ioctl(struct ifnet *, u_long, caddr_t);
239 void em_watchdog(struct ifnet *);
240 void em_init(void *);
241 void em_stop(void *, int);
242 void em_media_status(struct ifnet *, struct ifmediareq *);
243 int  em_media_change(struct ifnet *);
244 uint64_t  em_flowstatus(struct em_softc *);
245 void em_identify_hardware(struct em_softc *);
246 int  em_allocate_pci_resources(struct em_softc *);
247 void em_free_pci_resources(struct em_softc *);
248 void em_local_timer(void *);
249 int  em_hardware_init(struct em_softc *);
250 void em_setup_interface(struct em_softc *);
251 int  em_setup_transmit_structures(struct em_softc *);
252 void em_initialize_transmit_unit(struct em_softc *);
253 int  em_setup_receive_structures(struct em_softc *);
254 void em_initialize_receive_unit(struct em_softc *);
255 void em_enable_intr(struct em_softc *);
256 void em_disable_intr(struct em_softc *);
257 void em_free_transmit_structures(struct em_softc *);
258 void em_free_receive_structures(struct em_softc *);
259 void em_update_stats_counters(struct em_softc *);
260 void em_disable_aspm(struct em_softc *);
261 void em_txeof(struct em_queue *);
262 int  em_allocate_receive_structures(struct em_softc *);
263 int  em_allocate_transmit_structures(struct em_softc *);
264 int  em_allocate_desc_rings(struct em_softc *);
265 int  em_rxfill(struct em_queue *);
266 void em_rxrefill(void *);
267 int  em_rxeof(struct em_queue *);
268 void em_receive_checksum(struct em_softc *, struct em_rx_desc *,
269 			 struct mbuf *);
270 u_int	em_transmit_checksum_setup(struct em_queue *, struct mbuf *, u_int,
271 	    u_int32_t *, u_int32_t *);
272 void em_iff(struct em_softc *);
273 #ifdef EM_DEBUG
274 void em_print_hw_stats(struct em_softc *);
275 #endif
276 void em_update_link_status(struct em_softc *);
277 int  em_get_buf(struct em_queue *, int);
278 void em_enable_hw_vlans(struct em_softc *);
279 u_int em_encap(struct em_queue *, struct mbuf *);
280 void em_smartspeed(struct em_softc *);
281 int  em_82547_fifo_workaround(struct em_softc *, int);
282 void em_82547_update_fifo_head(struct em_softc *, int);
283 int  em_82547_tx_fifo_reset(struct em_softc *);
284 void em_82547_move_tail(void *arg);
285 void em_82547_move_tail_locked(struct em_softc *);
286 int  em_dma_malloc(struct em_softc *, bus_size_t, struct em_dma_alloc *);
287 void em_dma_free(struct em_softc *, struct em_dma_alloc *);
288 u_int32_t em_fill_descriptors(u_int64_t address, u_int32_t length,
289 			      PDESC_ARRAY desc_array);
290 void em_flush_tx_ring(struct em_queue *);
291 void em_flush_rx_ring(struct em_queue *);
292 void em_flush_desc_rings(struct em_softc *);
293 
294 #ifndef SMALL_KERNEL
295 /* MSIX/Multiqueue functions */
296 int  em_allocate_msix(struct em_softc *);
297 int  em_setup_queues_msix(struct em_softc *);
298 int  em_queue_intr_msix(void *);
299 int  em_link_intr_msix(void *);
300 void em_enable_queue_intr_msix(struct em_queue *);
301 #else
302 #define em_allocate_msix(_sc) 	(-1)
303 #endif
304 
305 /*********************************************************************
306  *  OpenBSD Device Interface Entry Points
307  *********************************************************************/
308 
309 struct cfattach em_ca = {
310 	sizeof(struct em_softc), em_probe, em_attach, em_detach,
311 	em_activate
312 };
313 
314 struct cfdriver em_cd = {
315 	NULL, "em", DV_IFNET
316 };
317 
318 static int em_smart_pwr_down = FALSE;
319 int em_enable_msix = 0;
320 
321 /*********************************************************************
322  *  Device identification routine
323  *
324  *  em_probe determines if the driver should be loaded on
325  *  adapter based on PCI vendor/device id of the adapter.
326  *
327  *  return 0 on no match, positive on match
328  *********************************************************************/
329 
330 int
331 em_probe(struct device *parent, void *match, void *aux)
332 {
333 	INIT_DEBUGOUT("em_probe: begin");
334 
335 	return (pci_matchbyid((struct pci_attach_args *)aux, em_devices,
336 	    nitems(em_devices)));
337 }
338 
339 void
340 em_defer_attach(struct device *self)
341 {
342 	struct em_softc *sc = (struct em_softc *)self;
343 	struct pci_attach_args *pa = &sc->osdep.em_pa;
344 	pci_chipset_tag_t	pc = pa->pa_pc;
345 	void *gcu;
346 
347 	INIT_DEBUGOUT("em_defer_attach: begin");
348 
349 	if ((gcu = em_lookup_gcu(self)) == 0) {
350 		printf("%s: No GCU found, defered attachment failed\n",
351 		    DEVNAME(sc));
352 
353 		if (sc->sc_intrhand)
354 			pci_intr_disestablish(pc, sc->sc_intrhand);
355 		sc->sc_intrhand = 0;
356 
357 		em_stop(sc, 1);
358 
359 		em_free_pci_resources(sc);
360 
361 		return;
362 	}
363 
364 	sc->hw.gcu = gcu;
365 
366 	em_attach_miibus(self);
367 
368 	em_setup_interface(sc);
369 
370 	em_setup_link(&sc->hw);
371 
372 	em_update_link_status(sc);
373 }
374 
375 /*********************************************************************
376  *  Device initialization routine
377  *
378  *  The attach entry point is called when the driver is being loaded.
379  *  This routine identifies the type of hardware, allocates all resources
380  *  and initializes the hardware.
381  *
382  *********************************************************************/
383 
384 void
385 em_attach(struct device *parent, struct device *self, void *aux)
386 {
387 	struct pci_attach_args *pa = aux;
388 	struct em_softc *sc;
389 	int defer = 0;
390 
391 	INIT_DEBUGOUT("em_attach: begin");
392 
393 	sc = (struct em_softc *)self;
394 	sc->sc_dmat = pa->pa_dmat;
395 	sc->osdep.em_pa = *pa;
396 
397 	timeout_set(&sc->timer_handle, em_local_timer, sc);
398 	timeout_set(&sc->tx_fifo_timer_handle, em_82547_move_tail, sc);
399 
400 	/* Determine hardware revision */
401 	em_identify_hardware(sc);
402 
403 	/*
404 	 * Only use MSI on the newer PCIe parts, with the exception
405 	 * of 82571/82572 due to "Byte Enables 2 and 3 Are Not Set" errata
406 	 */
407 	if (sc->hw.mac_type <= em_82572)
408 		sc->osdep.em_pa.pa_flags &= ~PCI_FLAGS_MSI_ENABLED;
409 
410 	/* Parameters (to be read from user) */
411 	if (sc->hw.mac_type >= em_82544) {
412 		sc->sc_tx_slots = EM_MAX_TXD;
413 		sc->sc_rx_slots = EM_MAX_RXD;
414 	} else {
415 		sc->sc_tx_slots = EM_MAX_TXD_82543;
416 		sc->sc_rx_slots = EM_MAX_RXD_82543;
417 	}
418 	sc->tx_int_delay = EM_TIDV;
419 	sc->tx_abs_int_delay = EM_TADV;
420 	sc->rx_int_delay = EM_RDTR;
421 	sc->rx_abs_int_delay = EM_RADV;
422 	sc->hw.autoneg = DO_AUTO_NEG;
423 	sc->hw.wait_autoneg_complete = WAIT_FOR_AUTO_NEG_DEFAULT;
424 	sc->hw.autoneg_advertised = AUTONEG_ADV_DEFAULT;
425 	sc->hw.tbi_compatibility_en = TRUE;
426 	sc->sc_rx_buffer_len = EM_RXBUFFER_2048;
427 
428 	sc->hw.phy_init_script = 1;
429 	sc->hw.phy_reset_disable = FALSE;
430 
431 #ifndef EM_MASTER_SLAVE
432 	sc->hw.master_slave = em_ms_hw_default;
433 #else
434 	sc->hw.master_slave = EM_MASTER_SLAVE;
435 #endif
436 
437 	/*
438 	 * This controls when hardware reports transmit completion
439 	 * status.
440 	 */
441 	sc->hw.report_tx_early = 1;
442 
443 	if (em_allocate_pci_resources(sc))
444 		goto err_pci;
445 
446 	/* Initialize eeprom parameters */
447 	em_init_eeprom_params(&sc->hw);
448 
449 	/*
450 	 * Set the max frame size assuming standard Ethernet
451 	 * sized frames.
452 	 */
453 	switch (sc->hw.mac_type) {
454 		case em_82573:
455 		{
456 			uint16_t	eeprom_data = 0;
457 
458 			/*
459 			 * 82573 only supports Jumbo frames
460 			 * if ASPM is disabled.
461 			 */
462 			em_read_eeprom(&sc->hw, EEPROM_INIT_3GIO_3,
463 			    1, &eeprom_data);
464 			if (eeprom_data & EEPROM_WORD1A_ASPM_MASK) {
465 				sc->hw.max_frame_size = ETHER_MAX_LEN;
466 				break;
467 			}
468 			/* Allow Jumbo frames */
469 			/* FALLTHROUGH */
470 		}
471 		case em_82571:
472 		case em_82572:
473 		case em_82574:
474 		case em_82575:
475 		case em_82576:
476 		case em_82580:
477 		case em_i210:
478 		case em_i350:
479 		case em_ich9lan:
480 		case em_ich10lan:
481 		case em_pch2lan:
482 		case em_pch_lpt:
483 		case em_pch_spt:
484 		case em_pch_cnp:
485 		case em_80003es2lan:
486 			/* 9K Jumbo Frame size */
487 			sc->hw.max_frame_size = 9234;
488 			break;
489 		case em_pchlan:
490 			sc->hw.max_frame_size = 4096;
491 			break;
492 		case em_82542_rev2_0:
493 		case em_82542_rev2_1:
494 		case em_ich8lan:
495 			/* Adapters that do not support Jumbo frames */
496 			sc->hw.max_frame_size = ETHER_MAX_LEN;
497 			break;
498 		default:
499 			sc->hw.max_frame_size =
500 			    MAX_JUMBO_FRAME_SIZE;
501 	}
502 
503 	sc->hw.min_frame_size =
504 	    ETHER_MIN_LEN + ETHER_CRC_LEN;
505 
506 	if (em_allocate_desc_rings(sc) != 0) {
507 		printf("%s: Unable to allocate descriptor ring memory\n",
508 		    DEVNAME(sc));
509 		goto err_pci;
510 	}
511 
512 	/* Initialize the hardware */
513 	if ((defer = em_hardware_init(sc))) {
514 		if (defer == EAGAIN)
515 			config_defer(self, em_defer_attach);
516 		else {
517 			printf("%s: Unable to initialize the hardware\n",
518 			    DEVNAME(sc));
519 			goto err_pci;
520 		}
521 	}
522 
523 	if (sc->hw.mac_type == em_80003es2lan || sc->hw.mac_type == em_82575 ||
524 	    sc->hw.mac_type == em_82576 ||
525 	    sc->hw.mac_type == em_82580 || sc->hw.mac_type == em_i210 ||
526 	    sc->hw.mac_type == em_i350) {
527 		uint32_t reg = EM_READ_REG(&sc->hw, E1000_STATUS);
528 		sc->hw.bus_func = (reg & E1000_STATUS_FUNC_MASK) >>
529 		    E1000_STATUS_FUNC_SHIFT;
530 
531 		switch (sc->hw.bus_func) {
532 		case 0:
533 			sc->hw.swfw = E1000_SWFW_PHY0_SM;
534 			break;
535 		case 1:
536 			sc->hw.swfw = E1000_SWFW_PHY1_SM;
537 			break;
538 		case 2:
539 			sc->hw.swfw = E1000_SWFW_PHY2_SM;
540 			break;
541 		case 3:
542 			sc->hw.swfw = E1000_SWFW_PHY3_SM;
543 			break;
544 		}
545 	} else {
546 		sc->hw.bus_func = 0;
547 	}
548 
549 	/* Copy the permanent MAC address out of the EEPROM */
550 	if (em_read_mac_addr(&sc->hw) < 0) {
551 		printf("%s: EEPROM read error while reading mac address\n",
552 		       DEVNAME(sc));
553 		goto err_pci;
554 	}
555 
556 	bcopy(sc->hw.mac_addr, sc->sc_ac.ac_enaddr, ETHER_ADDR_LEN);
557 
558 	/* Setup OS specific network interface */
559 	if (!defer)
560 		em_setup_interface(sc);
561 
562 	/* Initialize statistics */
563 	em_clear_hw_cntrs(&sc->hw);
564 #ifndef SMALL_KERNEL
565 	em_update_stats_counters(sc);
566 #endif
567 	sc->hw.get_link_status = 1;
568 	if (!defer)
569 		em_update_link_status(sc);
570 
571 #ifdef EM_DEBUG
572 	printf(", mac %#x phy %#x", sc->hw.mac_type, sc->hw.phy_type);
573 #endif
574 	printf(", address %s\n", ether_sprintf(sc->sc_ac.ac_enaddr));
575 
576 	/* Indicate SOL/IDER usage */
577 	if (em_check_phy_reset_block(&sc->hw))
578 		printf("%s: PHY reset is blocked due to SOL/IDER session.\n",
579 		    DEVNAME(sc));
580 
581 	/* Identify 82544 on PCI-X */
582 	em_get_bus_info(&sc->hw);
583 	if (sc->hw.bus_type == em_bus_type_pcix &&
584 	    sc->hw.mac_type == em_82544)
585 		sc->pcix_82544 = TRUE;
586         else
587 		sc->pcix_82544 = FALSE;
588 
589 	sc->hw.icp_xxxx_is_link_up = FALSE;
590 
591 	INIT_DEBUGOUT("em_attach: end");
592 	return;
593 
594 err_pci:
595 	em_free_pci_resources(sc);
596 }
597 
598 /*********************************************************************
599  *  Transmit entry point
600  *
601  *  em_start is called by the stack to initiate a transmit.
602  *  The driver will remain in this routine as long as there are
603  *  packets to transmit and transmit resources are available.
604  *  In case resources are not available stack is notified and
605  *  the packet is requeued.
606  **********************************************************************/
607 
608 void
609 em_start(struct ifqueue *ifq)
610 {
611 	struct ifnet *ifp = ifq->ifq_if;
612 	struct em_softc *sc = ifp->if_softc;
613 	u_int head, free, used;
614 	struct mbuf *m;
615 	int post = 0;
616 	struct em_queue *que = sc->queues; /* Use only first queue. */
617 
618 	if (!sc->link_active) {
619 		ifq_purge(ifq);
620 		return;
621 	}
622 
623 	/* calculate free space */
624 	head = que->tx.sc_tx_desc_head;
625 	free = que->tx.sc_tx_desc_tail;
626 	if (free <= head)
627 		free += sc->sc_tx_slots;
628 	free -= head;
629 
630 	if (sc->hw.mac_type != em_82547) {
631 		bus_dmamap_sync(sc->sc_dmat, que->tx.sc_tx_dma.dma_map,
632 		    0, que->tx.sc_tx_dma.dma_map->dm_mapsize,
633 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
634 	}
635 
636 	for (;;) {
637 		/* use 2 because cksum setup can use an extra slot */
638 		if (EM_MAX_SCATTER + 2 > free) {
639 			ifq_set_oactive(ifq);
640 			break;
641 		}
642 
643 		m = ifq_dequeue(ifq);
644 		if (m == NULL)
645 			break;
646 
647 		used = em_encap(que, m);
648 		if (used == 0) {
649 			m_freem(m);
650 			continue;
651 		}
652 
653 		KASSERT(used <= free);
654 
655 		free -= used;
656 
657 #if NBPFILTER > 0
658 		/* Send a copy of the frame to the BPF listener */
659 		if (ifp->if_bpf)
660 			bpf_mtap_ether(ifp->if_bpf, m, BPF_DIRECTION_OUT);
661 #endif
662 
663 		/* Set timeout in case hardware has problems transmitting */
664 		ifp->if_timer = EM_TX_TIMEOUT;
665 
666 		if (sc->hw.mac_type == em_82547) {
667 			int len = m->m_pkthdr.len;
668 
669 			if (sc->link_duplex == HALF_DUPLEX)
670 				em_82547_move_tail_locked(sc);
671 			else {
672 				E1000_WRITE_REG(&sc->hw, TDT(que->me),
673 				    que->tx.sc_tx_desc_head);
674 				em_82547_update_fifo_head(sc, len);
675 			}
676 		}
677 
678 		post = 1;
679 	}
680 
681 	if (sc->hw.mac_type != em_82547) {
682 		bus_dmamap_sync(sc->sc_dmat, que->tx.sc_tx_dma.dma_map,
683 		    0, que->tx.sc_tx_dma.dma_map->dm_mapsize,
684 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
685 		/*
686 		 * Advance the Transmit Descriptor Tail (Tdt),
687 		 * this tells the E1000 that this frame is
688 		 * available to transmit.
689 		 */
690 		if (post)
691 			E1000_WRITE_REG(&sc->hw, TDT(que->me),
692 			    que->tx.sc_tx_desc_head);
693 	}
694 }
695 
696 /*********************************************************************
697  *  Ioctl entry point
698  *
699  *  em_ioctl is called when the user wants to configure the
700  *  interface.
701  *
702  *  return 0 on success, positive on failure
703  **********************************************************************/
704 
705 int
706 em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
707 {
708 	int		error = 0;
709 	struct ifreq   *ifr = (struct ifreq *) data;
710 	struct em_softc *sc = ifp->if_softc;
711 	int s;
712 
713 	s = splnet();
714 
715 	switch (command) {
716 	case SIOCSIFADDR:
717 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFADDR (Set Interface "
718 			       "Addr)");
719 		if (!(ifp->if_flags & IFF_UP)) {
720 			ifp->if_flags |= IFF_UP;
721 			em_init(sc);
722 		}
723 		break;
724 
725 	case SIOCSIFFLAGS:
726 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFFLAGS (Set Interface Flags)");
727 		if (ifp->if_flags & IFF_UP) {
728 			if (ifp->if_flags & IFF_RUNNING)
729 				error = ENETRESET;
730 			else
731 				em_init(sc);
732 		} else {
733 			if (ifp->if_flags & IFF_RUNNING)
734 				em_stop(sc, 0);
735 		}
736 		break;
737 
738 	case SIOCSIFMEDIA:
739 		/* Check SOL/IDER usage */
740 		if (em_check_phy_reset_block(&sc->hw)) {
741 			printf("%s: Media change is blocked due to SOL/IDER session.\n",
742 			    DEVNAME(sc));
743 			break;
744 		}
745 	case SIOCGIFMEDIA:
746 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCxIFMEDIA (Get/Set Interface Media)");
747 		error = ifmedia_ioctl(ifp, ifr, &sc->media, command);
748 		break;
749 
750 	case SIOCGIFRXR:
751 		error = if_rxr_ioctl((struct if_rxrinfo *)ifr->ifr_data,
752 		    NULL, EM_MCLBYTES, &sc->queues->rx.sc_rx_ring);
753 		break;
754 
755 	default:
756 		error = ether_ioctl(ifp, &sc->sc_ac, command, data);
757 	}
758 
759 	if (error == ENETRESET) {
760 		if (ifp->if_flags & IFF_RUNNING) {
761 			em_disable_intr(sc);
762 			em_iff(sc);
763 			if (sc->hw.mac_type == em_82542_rev2_0)
764 				em_initialize_receive_unit(sc);
765 			em_enable_intr(sc);
766 		}
767 		error = 0;
768 	}
769 
770 	splx(s);
771 	return (error);
772 }
773 
774 /*********************************************************************
775  *  Watchdog entry point
776  *
777  *  This routine is called whenever hardware quits transmitting.
778  *
779  **********************************************************************/
780 
781 void
782 em_watchdog(struct ifnet *ifp)
783 {
784 	struct em_softc *sc = ifp->if_softc;
785 	struct em_queue *que = sc->queues; /* Use only first queue. */
786 
787 
788 	/* If we are in this routine because of pause frames, then
789 	 * don't reset the hardware.
790 	 */
791 	if (E1000_READ_REG(&sc->hw, STATUS) & E1000_STATUS_TXOFF) {
792 		ifp->if_timer = EM_TX_TIMEOUT;
793 		return;
794 	}
795 	printf("%s: watchdog: head %u tail %u TDH %u TDT %u\n",
796 	    DEVNAME(sc),
797 	    que->tx.sc_tx_desc_head, que->tx.sc_tx_desc_tail,
798 	    E1000_READ_REG(&sc->hw, TDH(que->me)),
799 	    E1000_READ_REG(&sc->hw, TDT(que->me)));
800 
801 	em_init(sc);
802 
803 	sc->watchdog_events++;
804 }
805 
806 /*********************************************************************
807  *  Init entry point
808  *
809  *  This routine is used in two ways. It is used by the stack as
810  *  init entry point in network interface structure. It is also used
811  *  by the driver as a hw/sw initialization routine to get to a
812  *  consistent state.
813  *
814  **********************************************************************/
815 
816 void
817 em_init(void *arg)
818 {
819 	struct em_softc *sc = arg;
820 	struct ifnet   *ifp = &sc->sc_ac.ac_if;
821 	uint32_t	pba;
822 	int s;
823 
824 	s = splnet();
825 
826 	INIT_DEBUGOUT("em_init: begin");
827 
828 	em_stop(sc, 0);
829 
830 	/*
831 	 * Packet Buffer Allocation (PBA)
832 	 * Writing PBA sets the receive portion of the buffer
833 	 * the remainder is used for the transmit buffer.
834 	 *
835 	 * Devices before the 82547 had a Packet Buffer of 64K.
836 	 *   Default allocation: PBA=48K for Rx, leaving 16K for Tx.
837 	 * After the 82547 the buffer was reduced to 40K.
838 	 *   Default allocation: PBA=30K for Rx, leaving 10K for Tx.
839 	 *   Note: default does not leave enough room for Jumbo Frame >10k.
840 	 */
841 	switch (sc->hw.mac_type) {
842 	case em_82547:
843 	case em_82547_rev_2: /* 82547: Total Packet Buffer is 40K */
844 		if (sc->hw.max_frame_size > EM_RXBUFFER_8192)
845 			pba = E1000_PBA_22K; /* 22K for Rx, 18K for Tx */
846 		else
847 			pba = E1000_PBA_30K; /* 30K for Rx, 10K for Tx */
848 		sc->tx_fifo_head = 0;
849 		sc->tx_head_addr = pba << EM_TX_HEAD_ADDR_SHIFT;
850 		sc->tx_fifo_size = (E1000_PBA_40K - pba) << EM_PBA_BYTES_SHIFT;
851 		break;
852 	case em_82571:
853 	case em_82572: /* Total Packet Buffer on these is 48k */
854 	case em_82575:
855 	case em_82576:
856 	case em_82580:
857 	case em_80003es2lan:
858 	case em_i350:
859 		pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
860 		break;
861 	case em_i210:
862 		pba = E1000_PBA_34K;
863 		break;
864 	case em_82573: /* 82573: Total Packet Buffer is 32K */
865 		/* Jumbo frames not supported */
866 		pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
867 		break;
868 	case em_82574: /* Total Packet Buffer is 40k */
869 		pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
870 		break;
871 	case em_ich8lan:
872 		pba = E1000_PBA_8K;
873 		break;
874 	case em_ich9lan:
875 	case em_ich10lan:
876 		/* Boost Receive side for jumbo frames */
877 		if (sc->hw.max_frame_size > EM_RXBUFFER_4096)
878 			pba = E1000_PBA_14K;
879 		else
880 			pba = E1000_PBA_10K;
881 		break;
882 	case em_pchlan:
883 	case em_pch2lan:
884 	case em_pch_lpt:
885 	case em_pch_spt:
886 	case em_pch_cnp:
887 		pba = E1000_PBA_26K;
888 		break;
889 	default:
890 		/* Devices before 82547 had a Packet Buffer of 64K.   */
891 		if (sc->hw.max_frame_size > EM_RXBUFFER_8192)
892 			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
893 		else
894 			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
895 	}
896 	INIT_DEBUGOUT1("em_init: pba=%dK",pba);
897 	E1000_WRITE_REG(&sc->hw, PBA, pba);
898 
899 	/* Get the latest mac address, User can use a LAA */
900 	bcopy(sc->sc_ac.ac_enaddr, sc->hw.mac_addr, ETHER_ADDR_LEN);
901 
902 	/* Initialize the hardware */
903 	if (em_hardware_init(sc)) {
904 		printf("%s: Unable to initialize the hardware\n",
905 		       DEVNAME(sc));
906 		splx(s);
907 		return;
908 	}
909 	em_update_link_status(sc);
910 
911 	E1000_WRITE_REG(&sc->hw, VET, ETHERTYPE_VLAN);
912 	if (ifp->if_capabilities & IFCAP_VLAN_HWTAGGING)
913 		em_enable_hw_vlans(sc);
914 
915 	/* Prepare transmit descriptors and buffers */
916 	if (em_setup_transmit_structures(sc)) {
917 		printf("%s: Could not setup transmit structures\n",
918 		       DEVNAME(sc));
919 		em_stop(sc, 0);
920 		splx(s);
921 		return;
922 	}
923 	em_initialize_transmit_unit(sc);
924 
925 	/* Prepare receive descriptors and buffers */
926 	if (em_setup_receive_structures(sc)) {
927 		printf("%s: Could not setup receive structures\n",
928 		       DEVNAME(sc));
929 		em_stop(sc, 0);
930 		splx(s);
931 		return;
932 	}
933 	em_initialize_receive_unit(sc);
934 
935 #ifndef SMALL_KERNEL
936 	if (sc->msix) {
937 		if (em_setup_queues_msix(sc)) {
938 			printf("%s: Can't setup msix queues\n", DEVNAME(sc));
939 			splx(s);
940 			return;
941 		}
942 	}
943 #endif
944 
945 	/* Program promiscuous mode and multicast filters. */
946 	em_iff(sc);
947 
948 	ifp->if_flags |= IFF_RUNNING;
949 	ifq_clr_oactive(&ifp->if_snd);
950 
951 	timeout_add_sec(&sc->timer_handle, 1);
952 	em_clear_hw_cntrs(&sc->hw);
953 	em_enable_intr(sc);
954 
955 	/* Don't reset the phy next time init gets called */
956 	sc->hw.phy_reset_disable = TRUE;
957 
958 	splx(s);
959 }
960 
961 /*********************************************************************
962  *
963  *  Interrupt Service routine
964  *
965  **********************************************************************/
966 int
967 em_intr(void *arg)
968 {
969 	struct em_softc	*sc = arg;
970 	struct em_queue *que = sc->queues; /* single queue */
971 	struct ifnet	*ifp = &sc->sc_ac.ac_if;
972 	u_int32_t	reg_icr, test_icr;
973 
974 	test_icr = reg_icr = E1000_READ_REG(&sc->hw, ICR);
975 	if (sc->hw.mac_type >= em_82571)
976 		test_icr = (reg_icr & E1000_ICR_INT_ASSERTED);
977 	if (!test_icr)
978 		return (0);
979 
980 	if (ifp->if_flags & IFF_RUNNING) {
981 		em_txeof(que);
982 		if (em_rxeof(que))
983 			em_rxrefill(que);
984 	}
985 
986 	/* Link status change */
987 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
988 		KERNEL_LOCK();
989 		sc->hw.get_link_status = 1;
990 		em_check_for_link(&sc->hw);
991 		em_update_link_status(sc);
992 		KERNEL_UNLOCK();
993 	}
994 
995 	return (1);
996 }
997 
998 /*********************************************************************
999  *
1000  *  Media Ioctl callback
1001  *
1002  *  This routine is called whenever the user queries the status of
1003  *  the interface using ifconfig.
1004  *
1005  **********************************************************************/
1006 void
1007 em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1008 {
1009 	struct em_softc *sc = ifp->if_softc;
1010 	uint64_t fiber_type = IFM_1000_SX;
1011 	u_int16_t gsr;
1012 
1013 	INIT_DEBUGOUT("em_media_status: begin");
1014 
1015 	em_check_for_link(&sc->hw);
1016 	em_update_link_status(sc);
1017 
1018 	ifmr->ifm_status = IFM_AVALID;
1019 	ifmr->ifm_active = IFM_ETHER;
1020 
1021 	if (!sc->link_active) {
1022 		ifmr->ifm_active |= IFM_NONE;
1023 		return;
1024 	}
1025 
1026 	ifmr->ifm_status |= IFM_ACTIVE;
1027 
1028 	if (sc->hw.media_type == em_media_type_fiber ||
1029 	    sc->hw.media_type == em_media_type_internal_serdes) {
1030 		if (sc->hw.mac_type == em_82545)
1031 			fiber_type = IFM_1000_LX;
1032 		ifmr->ifm_active |= fiber_type | IFM_FDX;
1033 	} else {
1034 		switch (sc->link_speed) {
1035 		case 10:
1036 			ifmr->ifm_active |= IFM_10_T;
1037 			break;
1038 		case 100:
1039 			ifmr->ifm_active |= IFM_100_TX;
1040 			break;
1041 		case 1000:
1042 			ifmr->ifm_active |= IFM_1000_T;
1043 			break;
1044 		}
1045 
1046 		if (sc->link_duplex == FULL_DUPLEX)
1047 			ifmr->ifm_active |= em_flowstatus(sc) | IFM_FDX;
1048 		else
1049 			ifmr->ifm_active |= IFM_HDX;
1050 
1051 		if (IFM_SUBTYPE(ifmr->ifm_active) == IFM_1000_T) {
1052 			em_read_phy_reg(&sc->hw, PHY_1000T_STATUS, &gsr);
1053 			if (gsr & SR_1000T_MS_CONFIG_RES)
1054 				ifmr->ifm_active |= IFM_ETH_MASTER;
1055 		}
1056 	}
1057 }
1058 
1059 /*********************************************************************
1060  *
1061  *  Media Ioctl callback
1062  *
1063  *  This routine is called when the user changes speed/duplex using
1064  *  media/mediopt option with ifconfig.
1065  *
1066  **********************************************************************/
1067 int
1068 em_media_change(struct ifnet *ifp)
1069 {
1070 	struct em_softc *sc = ifp->if_softc;
1071 	struct ifmedia	*ifm = &sc->media;
1072 
1073 	INIT_DEBUGOUT("em_media_change: begin");
1074 
1075 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1076 		return (EINVAL);
1077 
1078 	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1079 	case IFM_AUTO:
1080 		sc->hw.autoneg = DO_AUTO_NEG;
1081 		sc->hw.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1082 		break;
1083 	case IFM_1000_LX:
1084 	case IFM_1000_SX:
1085 	case IFM_1000_T:
1086 		sc->hw.autoneg = DO_AUTO_NEG;
1087 		sc->hw.autoneg_advertised = ADVERTISE_1000_FULL;
1088 		break;
1089 	case IFM_100_TX:
1090 		sc->hw.autoneg = FALSE;
1091 		sc->hw.autoneg_advertised = 0;
1092 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1093 			sc->hw.forced_speed_duplex = em_100_full;
1094 		else
1095 			sc->hw.forced_speed_duplex = em_100_half;
1096 		break;
1097 	case IFM_10_T:
1098 		sc->hw.autoneg = FALSE;
1099 		sc->hw.autoneg_advertised = 0;
1100 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1101 			sc->hw.forced_speed_duplex = em_10_full;
1102 		else
1103 			sc->hw.forced_speed_duplex = em_10_half;
1104 		break;
1105 	default:
1106 		printf("%s: Unsupported media type\n", DEVNAME(sc));
1107 	}
1108 
1109 	/*
1110 	 * As the speed/duplex settings may have changed we need to
1111 	 * reset the PHY.
1112 	 */
1113 	sc->hw.phy_reset_disable = FALSE;
1114 
1115 	em_init(sc);
1116 
1117 	return (0);
1118 }
1119 
1120 uint64_t
1121 em_flowstatus(struct em_softc *sc)
1122 {
1123 	u_int16_t ar, lpar;
1124 
1125 	if (sc->hw.media_type == em_media_type_fiber ||
1126 	    sc->hw.media_type == em_media_type_internal_serdes)
1127 		return (0);
1128 
1129 	em_read_phy_reg(&sc->hw, PHY_AUTONEG_ADV, &ar);
1130 	em_read_phy_reg(&sc->hw, PHY_LP_ABILITY, &lpar);
1131 
1132 	if ((ar & NWAY_AR_PAUSE) && (lpar & NWAY_LPAR_PAUSE))
1133 		return (IFM_FLOW|IFM_ETH_TXPAUSE|IFM_ETH_RXPAUSE);
1134 	else if (!(ar & NWAY_AR_PAUSE) && (ar & NWAY_AR_ASM_DIR) &&
1135 		(lpar & NWAY_LPAR_PAUSE) && (lpar & NWAY_LPAR_ASM_DIR))
1136 		return (IFM_FLOW|IFM_ETH_TXPAUSE);
1137 	else if ((ar & NWAY_AR_PAUSE) && (ar & NWAY_AR_ASM_DIR) &&
1138 		!(lpar & NWAY_LPAR_PAUSE) && (lpar & NWAY_LPAR_ASM_DIR))
1139 		return (IFM_FLOW|IFM_ETH_RXPAUSE);
1140 
1141 	return (0);
1142 }
1143 
1144 /*********************************************************************
1145  *
1146  *  This routine maps the mbufs to tx descriptors.
1147  *
1148  *  return 0 on success, positive on failure
1149  **********************************************************************/
1150 u_int
1151 em_encap(struct em_queue *que, struct mbuf *m)
1152 {
1153 	struct em_softc *sc = que->sc;
1154 	struct em_packet *pkt;
1155 	struct em_tx_desc *desc;
1156 	bus_dmamap_t map;
1157 	u_int32_t txd_upper, txd_lower;
1158 	u_int head, last, used = 0;
1159 	int i, j;
1160 
1161 	/* For 82544 Workaround */
1162 	DESC_ARRAY		desc_array;
1163 	u_int32_t		array_elements;
1164 
1165 	/* get a dmamap for this packet from the next free slot */
1166 	head = que->tx.sc_tx_desc_head;
1167 	pkt = &que->tx.sc_tx_pkts_ring[head];
1168 	map = pkt->pkt_map;
1169 
1170 	switch (bus_dmamap_load_mbuf(sc->sc_dmat, map, m, BUS_DMA_NOWAIT)) {
1171 	case 0:
1172 		break;
1173 	case EFBIG:
1174 		if (m_defrag(m, M_DONTWAIT) == 0 &&
1175 		    bus_dmamap_load_mbuf(sc->sc_dmat, map, m,
1176 		     BUS_DMA_NOWAIT) == 0)
1177 			break;
1178 
1179 		/* FALLTHROUGH */
1180 	default:
1181 		sc->no_tx_dma_setup++;
1182 		return (0);
1183 	}
1184 
1185 	bus_dmamap_sync(sc->sc_dmat, map,
1186 	    0, map->dm_mapsize,
1187 	    BUS_DMASYNC_PREWRITE);
1188 
1189 	if (sc->hw.mac_type == em_82547) {
1190 		bus_dmamap_sync(sc->sc_dmat, que->tx.sc_tx_dma.dma_map,
1191 		    0, que->tx.sc_tx_dma.dma_map->dm_mapsize,
1192 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1193 	}
1194 
1195 	if (sc->hw.mac_type >= em_82543 && sc->hw.mac_type != em_82575 &&
1196 	    sc->hw.mac_type != em_82576 &&
1197 	    sc->hw.mac_type != em_82580 && sc->hw.mac_type != em_i210 &&
1198 	    sc->hw.mac_type != em_i350) {
1199 		used += em_transmit_checksum_setup(que, m, head,
1200 		    &txd_upper, &txd_lower);
1201 	} else {
1202 		txd_upper = txd_lower = 0;
1203 	}
1204 
1205 	head += used;
1206 	if (head >= sc->sc_tx_slots)
1207 		head -= sc->sc_tx_slots;
1208 
1209 	for (i = 0; i < map->dm_nsegs; i++) {
1210 		/* If sc is 82544 and on PCI-X bus */
1211 		if (sc->pcix_82544) {
1212 			/*
1213 			 * Check the Address and Length combination and
1214 			 * split the data accordingly
1215 			 */
1216 			array_elements = em_fill_descriptors(
1217 			    map->dm_segs[i].ds_addr, map->dm_segs[i].ds_len,
1218 			    &desc_array);
1219 			for (j = 0; j < array_elements; j++) {
1220 				desc = &que->tx.sc_tx_desc_ring[head];
1221 
1222 				desc->buffer_addr = htole64(
1223 					desc_array.descriptor[j].address);
1224 				desc->lower.data = htole32(
1225 					(que->tx.sc_txd_cmd | txd_lower |
1226 					 (u_int16_t)desc_array.descriptor[j].length));
1227 				desc->upper.data = htole32(txd_upper);
1228 
1229 				last = head;
1230 				if (++head == sc->sc_tx_slots)
1231 					head = 0;
1232 
1233 				used++;
1234 			}
1235 		} else {
1236 			desc = &que->tx.sc_tx_desc_ring[head];
1237 
1238 			desc->buffer_addr = htole64(map->dm_segs[i].ds_addr);
1239 			desc->lower.data = htole32(que->tx.sc_txd_cmd |
1240 			    txd_lower | map->dm_segs[i].ds_len);
1241 			desc->upper.data = htole32(txd_upper);
1242 
1243 			last = head;
1244 			if (++head == sc->sc_tx_slots)
1245 	        		head = 0;
1246 
1247 			used++;
1248 		}
1249 	}
1250 
1251 #if NVLAN > 0
1252 	/* Find out if we are in VLAN mode */
1253 	if (m->m_flags & M_VLANTAG) {
1254 		/* Set the VLAN id */
1255 		desc->upper.fields.special = htole16(m->m_pkthdr.ether_vtag);
1256 
1257 		/* Tell hardware to add tag */
1258 		desc->lower.data |= htole32(E1000_TXD_CMD_VLE);
1259 	}
1260 #endif
1261 
1262 	/* mark the packet with the mbuf and last desc slot */
1263 	pkt->pkt_m = m;
1264 	pkt->pkt_eop = last;
1265 
1266 	que->tx.sc_tx_desc_head = head;
1267 
1268 	/*
1269 	 * Last Descriptor of Packet
1270 	 * needs End Of Packet (EOP)
1271 	 * and Report Status (RS)
1272 	 */
1273 	desc->lower.data |= htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
1274 
1275 	if (sc->hw.mac_type == em_82547) {
1276 		bus_dmamap_sync(sc->sc_dmat, que->tx.sc_tx_dma.dma_map,
1277 		    0, que->tx.sc_tx_dma.dma_map->dm_mapsize,
1278 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1279 	}
1280 
1281 	return (used);
1282 }
1283 
1284 /*********************************************************************
1285  *
1286  * 82547 workaround to avoid controller hang in half-duplex environment.
1287  * The workaround is to avoid queuing a large packet that would span
1288  * the internal Tx FIFO ring boundary. We need to reset the FIFO pointers
1289  * in this case. We do that only when FIFO is quiescent.
1290  *
1291  **********************************************************************/
1292 void
1293 em_82547_move_tail_locked(struct em_softc *sc)
1294 {
1295 	uint16_t hw_tdt;
1296 	uint16_t sw_tdt;
1297 	struct em_tx_desc *tx_desc;
1298 	uint16_t length = 0;
1299 	boolean_t eop = 0;
1300 	struct em_queue *que = sc->queues; /* single queue chip */
1301 
1302 	hw_tdt = E1000_READ_REG(&sc->hw, TDT(que->me));
1303 	sw_tdt = que->tx.sc_tx_desc_head;
1304 
1305 	while (hw_tdt != sw_tdt) {
1306 		tx_desc = &que->tx.sc_tx_desc_ring[hw_tdt];
1307 		length += tx_desc->lower.flags.length;
1308 		eop = tx_desc->lower.data & E1000_TXD_CMD_EOP;
1309 		if (++hw_tdt == sc->sc_tx_slots)
1310 			hw_tdt = 0;
1311 
1312 		if (eop) {
1313 			if (em_82547_fifo_workaround(sc, length)) {
1314 				sc->tx_fifo_wrk_cnt++;
1315 				timeout_add(&sc->tx_fifo_timer_handle, 1);
1316 				break;
1317 			}
1318 			E1000_WRITE_REG(&sc->hw, TDT(que->me), hw_tdt);
1319 			em_82547_update_fifo_head(sc, length);
1320 			length = 0;
1321 		}
1322 	}
1323 }
1324 
1325 void
1326 em_82547_move_tail(void *arg)
1327 {
1328 	struct em_softc *sc = arg;
1329 	int s;
1330 
1331 	s = splnet();
1332 	em_82547_move_tail_locked(sc);
1333 	splx(s);
1334 }
1335 
1336 int
1337 em_82547_fifo_workaround(struct em_softc *sc, int len)
1338 {
1339 	int fifo_space, fifo_pkt_len;
1340 
1341 	fifo_pkt_len = EM_ROUNDUP(len + EM_FIFO_HDR, EM_FIFO_HDR);
1342 
1343 	if (sc->link_duplex == HALF_DUPLEX) {
1344 		fifo_space = sc->tx_fifo_size - sc->tx_fifo_head;
1345 
1346 		if (fifo_pkt_len >= (EM_82547_PKT_THRESH + fifo_space)) {
1347 			if (em_82547_tx_fifo_reset(sc))
1348 				return (0);
1349 			else
1350 				return (1);
1351 		}
1352 	}
1353 
1354 	return (0);
1355 }
1356 
1357 void
1358 em_82547_update_fifo_head(struct em_softc *sc, int len)
1359 {
1360 	int fifo_pkt_len = EM_ROUNDUP(len + EM_FIFO_HDR, EM_FIFO_HDR);
1361 
1362 	/* tx_fifo_head is always 16 byte aligned */
1363 	sc->tx_fifo_head += fifo_pkt_len;
1364 	if (sc->tx_fifo_head >= sc->tx_fifo_size)
1365 		sc->tx_fifo_head -= sc->tx_fifo_size;
1366 }
1367 
1368 int
1369 em_82547_tx_fifo_reset(struct em_softc *sc)
1370 {
1371 	uint32_t tctl;
1372 	struct em_queue *que = sc->queues; /* single queue chip */
1373 
1374 	if ((E1000_READ_REG(&sc->hw, TDT(que->me)) ==
1375 	     E1000_READ_REG(&sc->hw, TDH(que->me))) &&
1376 	    (E1000_READ_REG(&sc->hw, TDFT) ==
1377 	     E1000_READ_REG(&sc->hw, TDFH)) &&
1378 	    (E1000_READ_REG(&sc->hw, TDFTS) ==
1379 	     E1000_READ_REG(&sc->hw, TDFHS)) &&
1380 	    (E1000_READ_REG(&sc->hw, TDFPC) == 0)) {
1381 
1382 		/* Disable TX unit */
1383 		tctl = E1000_READ_REG(&sc->hw, TCTL);
1384 		E1000_WRITE_REG(&sc->hw, TCTL, tctl & ~E1000_TCTL_EN);
1385 
1386 		/* Reset FIFO pointers */
1387 		E1000_WRITE_REG(&sc->hw, TDFT, sc->tx_head_addr);
1388 		E1000_WRITE_REG(&sc->hw, TDFH, sc->tx_head_addr);
1389 		E1000_WRITE_REG(&sc->hw, TDFTS, sc->tx_head_addr);
1390 		E1000_WRITE_REG(&sc->hw, TDFHS, sc->tx_head_addr);
1391 
1392 		/* Re-enable TX unit */
1393 		E1000_WRITE_REG(&sc->hw, TCTL, tctl);
1394 		E1000_WRITE_FLUSH(&sc->hw);
1395 
1396 		sc->tx_fifo_head = 0;
1397 		sc->tx_fifo_reset_cnt++;
1398 
1399 		return (TRUE);
1400 	} else
1401 		return (FALSE);
1402 }
1403 
1404 void
1405 em_iff(struct em_softc *sc)
1406 {
1407 	struct ifnet *ifp = &sc->sc_ac.ac_if;
1408 	struct arpcom *ac = &sc->sc_ac;
1409 	u_int32_t reg_rctl = 0;
1410 	u_int8_t  mta[MAX_NUM_MULTICAST_ADDRESSES * ETH_LENGTH_OF_ADDRESS];
1411 	struct ether_multi *enm;
1412 	struct ether_multistep step;
1413 	int i = 0;
1414 
1415 	IOCTL_DEBUGOUT("em_iff: begin");
1416 
1417 	if (sc->hw.mac_type == em_82542_rev2_0) {
1418 		reg_rctl = E1000_READ_REG(&sc->hw, RCTL);
1419 		if (sc->hw.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
1420 			em_pci_clear_mwi(&sc->hw);
1421 		reg_rctl |= E1000_RCTL_RST;
1422 		E1000_WRITE_REG(&sc->hw, RCTL, reg_rctl);
1423 		msec_delay(5);
1424 	}
1425 
1426 	reg_rctl = E1000_READ_REG(&sc->hw, RCTL);
1427 	reg_rctl &= ~(E1000_RCTL_MPE | E1000_RCTL_UPE);
1428 	ifp->if_flags &= ~IFF_ALLMULTI;
1429 
1430 	if (ifp->if_flags & IFF_PROMISC || ac->ac_multirangecnt > 0 ||
1431 	    ac->ac_multicnt > MAX_NUM_MULTICAST_ADDRESSES) {
1432 		ifp->if_flags |= IFF_ALLMULTI;
1433 		reg_rctl |= E1000_RCTL_MPE;
1434 		if (ifp->if_flags & IFF_PROMISC)
1435 			reg_rctl |= E1000_RCTL_UPE;
1436 	} else {
1437 		ETHER_FIRST_MULTI(step, ac, enm);
1438 		while (enm != NULL) {
1439 			bcopy(enm->enm_addrlo, mta + i, ETH_LENGTH_OF_ADDRESS);
1440 			i += ETH_LENGTH_OF_ADDRESS;
1441 
1442 			ETHER_NEXT_MULTI(step, enm);
1443 		}
1444 
1445 		em_mc_addr_list_update(&sc->hw, mta, ac->ac_multicnt, 0, 1);
1446 	}
1447 
1448 	E1000_WRITE_REG(&sc->hw, RCTL, reg_rctl);
1449 
1450 	if (sc->hw.mac_type == em_82542_rev2_0) {
1451 		reg_rctl = E1000_READ_REG(&sc->hw, RCTL);
1452 		reg_rctl &= ~E1000_RCTL_RST;
1453 		E1000_WRITE_REG(&sc->hw, RCTL, reg_rctl);
1454 		msec_delay(5);
1455 		if (sc->hw.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
1456 			em_pci_set_mwi(&sc->hw);
1457 	}
1458 }
1459 
1460 /*********************************************************************
1461  *  Timer routine
1462  *
1463  *  This routine checks for link status and updates statistics.
1464  *
1465  **********************************************************************/
1466 
1467 void
1468 em_local_timer(void *arg)
1469 {
1470 	struct ifnet   *ifp;
1471 	struct em_softc *sc = arg;
1472 	int s;
1473 
1474 	ifp = &sc->sc_ac.ac_if;
1475 
1476 	s = splnet();
1477 
1478 #ifndef SMALL_KERNEL
1479 	em_update_stats_counters(sc);
1480 #ifdef EM_DEBUG
1481 	if (ifp->if_flags & IFF_DEBUG && ifp->if_flags & IFF_RUNNING)
1482 		em_print_hw_stats(sc);
1483 #endif
1484 #endif
1485 	em_smartspeed(sc);
1486 
1487 	timeout_add_sec(&sc->timer_handle, 1);
1488 
1489 	splx(s);
1490 }
1491 
1492 void
1493 em_update_link_status(struct em_softc *sc)
1494 {
1495 	struct ifnet *ifp = &sc->sc_ac.ac_if;
1496 	u_char link_state;
1497 
1498 	if (E1000_READ_REG(&sc->hw, STATUS) & E1000_STATUS_LU) {
1499 		if (sc->link_active == 0) {
1500 			em_get_speed_and_duplex(&sc->hw,
1501 						&sc->link_speed,
1502 						&sc->link_duplex);
1503 			/* Check if we may set SPEED_MODE bit on PCI-E */
1504 			if ((sc->link_speed == SPEED_1000) &&
1505 			    ((sc->hw.mac_type == em_82571) ||
1506 			    (sc->hw.mac_type == em_82572) ||
1507 			    (sc->hw.mac_type == em_82575) ||
1508 			    (sc->hw.mac_type == em_82576) ||
1509 			    (sc->hw.mac_type == em_82580))) {
1510 				int tarc0;
1511 
1512 				tarc0 = E1000_READ_REG(&sc->hw, TARC0);
1513 				tarc0 |= SPEED_MODE_BIT;
1514 				E1000_WRITE_REG(&sc->hw, TARC0, tarc0);
1515 			}
1516 			sc->link_active = 1;
1517 			sc->smartspeed = 0;
1518 			ifp->if_baudrate = IF_Mbps(sc->link_speed);
1519 		}
1520 		link_state = (sc->link_duplex == FULL_DUPLEX) ?
1521 		    LINK_STATE_FULL_DUPLEX : LINK_STATE_HALF_DUPLEX;
1522 	} else {
1523 		if (sc->link_active == 1) {
1524 			ifp->if_baudrate = sc->link_speed = 0;
1525 			sc->link_duplex = 0;
1526 			sc->link_active = 0;
1527 		}
1528 		link_state = LINK_STATE_DOWN;
1529 	}
1530 	if (ifp->if_link_state != link_state) {
1531 		ifp->if_link_state = link_state;
1532 		if_link_state_change(ifp);
1533 	}
1534 }
1535 
1536 /*********************************************************************
1537  *
1538  *  This routine disables all traffic on the adapter by issuing a
1539  *  global reset on the MAC and deallocates TX/RX buffers.
1540  *
1541  **********************************************************************/
1542 
1543 void
1544 em_stop(void *arg, int softonly)
1545 {
1546 	struct em_softc *sc = arg;
1547 	struct em_queue *que = sc->queues; /* Use only first queue. */
1548 	struct ifnet   *ifp = &sc->sc_ac.ac_if;
1549 
1550 	/* Tell the stack that the interface is no longer active */
1551 	ifp->if_flags &= ~IFF_RUNNING;
1552 
1553 	INIT_DEBUGOUT("em_stop: begin");
1554 
1555 	timeout_del(&que->rx_refill);
1556 	timeout_del(&sc->timer_handle);
1557 	timeout_del(&sc->tx_fifo_timer_handle);
1558 
1559 	if (!softonly)
1560 		em_disable_intr(sc);
1561 	if (sc->hw.mac_type >= em_pch_spt)
1562 		em_flush_desc_rings(sc);
1563 	if (!softonly)
1564 		em_reset_hw(&sc->hw);
1565 
1566 	intr_barrier(sc->sc_intrhand);
1567 	ifq_barrier(&ifp->if_snd);
1568 
1569 	KASSERT((ifp->if_flags & IFF_RUNNING) == 0);
1570 
1571 	ifq_clr_oactive(&ifp->if_snd);
1572 	ifp->if_timer = 0;
1573 
1574 	em_free_transmit_structures(sc);
1575 	em_free_receive_structures(sc);
1576 }
1577 
1578 /*********************************************************************
1579  *
1580  *  Determine hardware revision.
1581  *
1582  **********************************************************************/
1583 void
1584 em_identify_hardware(struct em_softc *sc)
1585 {
1586 	u_int32_t reg;
1587 	struct pci_attach_args *pa = &sc->osdep.em_pa;
1588 
1589 	/* Make sure our PCI config space has the necessary stuff set */
1590 	sc->hw.pci_cmd_word = pci_conf_read(pa->pa_pc, pa->pa_tag,
1591 					    PCI_COMMAND_STATUS_REG);
1592 
1593 	/* Save off the information about this board */
1594 	sc->hw.vendor_id = PCI_VENDOR(pa->pa_id);
1595 	sc->hw.device_id = PCI_PRODUCT(pa->pa_id);
1596 
1597 	reg = pci_conf_read(pa->pa_pc, pa->pa_tag, PCI_CLASS_REG);
1598 	sc->hw.revision_id = PCI_REVISION(reg);
1599 
1600 	reg = pci_conf_read(pa->pa_pc, pa->pa_tag, PCI_SUBSYS_ID_REG);
1601 	sc->hw.subsystem_vendor_id = PCI_VENDOR(reg);
1602 	sc->hw.subsystem_id = PCI_PRODUCT(reg);
1603 
1604 	/* Identify the MAC */
1605 	if (em_set_mac_type(&sc->hw))
1606 		printf("%s: Unknown MAC Type\n", DEVNAME(sc));
1607 
1608 	if (sc->hw.mac_type == em_pchlan)
1609 		sc->hw.revision_id = PCI_PRODUCT(pa->pa_id) & 0x0f;
1610 
1611 	if (sc->hw.mac_type == em_82541 ||
1612 	    sc->hw.mac_type == em_82541_rev_2 ||
1613 	    sc->hw.mac_type == em_82547 ||
1614 	    sc->hw.mac_type == em_82547_rev_2)
1615 		sc->hw.phy_init_script = TRUE;
1616 }
1617 
1618 void
1619 em_legacy_irq_quirk_spt(struct em_softc *sc)
1620 {
1621 	uint32_t	reg;
1622 
1623 	/* Legacy interrupt: SPT needs a quirk. */
1624 	if (sc->hw.mac_type != em_pch_spt && sc->hw.mac_type != em_pch_cnp)
1625 		return;
1626 	if (sc->legacy_irq == 0)
1627 		return;
1628 
1629 	reg = EM_READ_REG(&sc->hw, E1000_FEXTNVM7);
1630 	reg |= E1000_FEXTNVM7_SIDE_CLK_UNGATE;
1631 	EM_WRITE_REG(&sc->hw, E1000_FEXTNVM7, reg);
1632 
1633 	reg = EM_READ_REG(&sc->hw, E1000_FEXTNVM9);
1634 	reg |= E1000_FEXTNVM9_IOSFSB_CLKGATE_DIS |
1635 	    E1000_FEXTNVM9_IOSFSB_CLKREQ_DIS;
1636 	EM_WRITE_REG(&sc->hw, E1000_FEXTNVM9, reg);
1637 }
1638 
1639 int
1640 em_allocate_pci_resources(struct em_softc *sc)
1641 {
1642 	int		val, rid;
1643 	struct pci_attach_args *pa = &sc->osdep.em_pa;
1644 	struct em_queue	       *que = NULL;
1645 
1646 	val = pci_conf_read(pa->pa_pc, pa->pa_tag, EM_MMBA);
1647 	if (PCI_MAPREG_TYPE(val) != PCI_MAPREG_TYPE_MEM) {
1648 		printf(": mmba is not mem space\n");
1649 		return (ENXIO);
1650 	}
1651 	if (pci_mapreg_map(pa, EM_MMBA, PCI_MAPREG_MEM_TYPE(val), 0,
1652 	    &sc->osdep.mem_bus_space_tag, &sc->osdep.mem_bus_space_handle,
1653 	    &sc->osdep.em_membase, &sc->osdep.em_memsize, 0)) {
1654 		printf(": cannot find mem space\n");
1655 		return (ENXIO);
1656 	}
1657 
1658 	switch (sc->hw.mac_type) {
1659 	case em_82544:
1660 	case em_82540:
1661 	case em_82545:
1662 	case em_82546:
1663 	case em_82541:
1664 	case em_82541_rev_2:
1665 		/* Figure out where our I/O BAR is ? */
1666 		for (rid = PCI_MAPREG_START; rid < PCI_MAPREG_END;) {
1667 			val = pci_conf_read(pa->pa_pc, pa->pa_tag, rid);
1668 			if (PCI_MAPREG_TYPE(val) == PCI_MAPREG_TYPE_IO) {
1669 				sc->io_rid = rid;
1670 				break;
1671 			}
1672 			rid += 4;
1673 			if (PCI_MAPREG_MEM_TYPE(val) ==
1674 			    PCI_MAPREG_MEM_TYPE_64BIT)
1675 				rid += 4;	/* skip high bits, too */
1676 		}
1677 
1678 		if (pci_mapreg_map(pa, rid, PCI_MAPREG_TYPE_IO, 0,
1679 		    &sc->osdep.io_bus_space_tag, &sc->osdep.io_bus_space_handle,
1680 		    &sc->osdep.em_iobase, &sc->osdep.em_iosize, 0)) {
1681 			printf(": cannot find i/o space\n");
1682 			return (ENXIO);
1683 		}
1684 
1685 		sc->hw.io_base = 0;
1686 		break;
1687 	default:
1688 		break;
1689 	}
1690 
1691 	sc->osdep.em_flashoffset = 0;
1692 	/* for ICH8 and family we need to find the flash memory */
1693 	if (sc->hw.mac_type >= em_pch_spt) {
1694 		sc->osdep.flash_bus_space_tag = sc->osdep.mem_bus_space_tag;
1695 		sc->osdep.flash_bus_space_handle = sc->osdep.mem_bus_space_handle;
1696 		sc->osdep.em_flashbase = 0;
1697 		sc->osdep.em_flashsize = 0;
1698 		sc->osdep.em_flashoffset = 0xe000;
1699 	} else if (IS_ICH8(sc->hw.mac_type)) {
1700 		val = pci_conf_read(pa->pa_pc, pa->pa_tag, EM_FLASH);
1701 		if (PCI_MAPREG_TYPE(val) != PCI_MAPREG_TYPE_MEM) {
1702 			printf(": flash is not mem space\n");
1703 			return (ENXIO);
1704 		}
1705 
1706 		if (pci_mapreg_map(pa, EM_FLASH, PCI_MAPREG_MEM_TYPE(val), 0,
1707 		    &sc->osdep.flash_bus_space_tag, &sc->osdep.flash_bus_space_handle,
1708 		    &sc->osdep.em_flashbase, &sc->osdep.em_flashsize, 0)) {
1709 			printf(": cannot find mem space\n");
1710 			return (ENXIO);
1711 		}
1712         }
1713 
1714 	sc->osdep.dev = (struct device *)sc;
1715 	sc->hw.back = &sc->osdep;
1716 
1717 	/* Only one queue for the moment. */
1718 	que = malloc(sizeof(struct em_queue), M_DEVBUF, M_NOWAIT | M_ZERO);
1719 	if (que == NULL) {
1720 		printf(": unable to allocate queue memory\n");
1721 		return (ENOMEM);
1722 	}
1723 	que->me = 0;
1724 	que->sc = sc;
1725 	timeout_set(&que->rx_refill, em_rxrefill, que);
1726 
1727 	sc->queues = que;
1728 	sc->num_queues = 1;
1729 	sc->msix = 0;
1730 	sc->legacy_irq = 0;
1731 	if (em_allocate_msix(sc) && em_allocate_legacy(sc))
1732 		return (ENXIO);
1733 
1734 	/*
1735 	 * the ICP_xxxx device has multiple, duplicate register sets for
1736 	 * use when it is being used as a network processor. Disable those
1737 	 * registers here, as they are not necessary in this context and
1738 	 * can confuse the system
1739 	 */
1740 	if(sc->hw.mac_type == em_icp_xxxx) {
1741 		int offset;
1742 		pcireg_t val;
1743 
1744 		if (!pci_get_capability(sc->osdep.em_pa.pa_pc,
1745 		    sc->osdep.em_pa.pa_tag, PCI_CAP_ID_ST, &offset, &val)) {
1746 			return (0);
1747 		}
1748 		offset += PCI_ST_SMIA_OFFSET;
1749 		pci_conf_write(sc->osdep.em_pa.pa_pc, sc->osdep.em_pa.pa_tag,
1750 		    offset, 0x06);
1751 		E1000_WRITE_REG(&sc->hw, IMC1, ~0x0);
1752 		E1000_WRITE_REG(&sc->hw, IMC2, ~0x0);
1753 	}
1754 	return (0);
1755 }
1756 
1757 void
1758 em_free_pci_resources(struct em_softc *sc)
1759 {
1760 	struct pci_attach_args *pa = &sc->osdep.em_pa;
1761 	pci_chipset_tag_t	pc = pa->pa_pc;
1762 	struct em_queue	       *que = NULL;
1763 	if (sc->sc_intrhand)
1764 		pci_intr_disestablish(pc, sc->sc_intrhand);
1765 	sc->sc_intrhand = 0;
1766 
1767 	if (sc->osdep.em_flashbase)
1768 		bus_space_unmap(sc->osdep.flash_bus_space_tag, sc->osdep.flash_bus_space_handle,
1769 				sc->osdep.em_flashsize);
1770 	sc->osdep.em_flashbase = 0;
1771 
1772 	if (sc->osdep.em_iobase)
1773 		bus_space_unmap(sc->osdep.io_bus_space_tag, sc->osdep.io_bus_space_handle,
1774 				sc->osdep.em_iosize);
1775 	sc->osdep.em_iobase = 0;
1776 
1777 	if (sc->osdep.em_membase)
1778 		bus_space_unmap(sc->osdep.mem_bus_space_tag, sc->osdep.mem_bus_space_handle,
1779 				sc->osdep.em_memsize);
1780 	sc->osdep.em_membase = 0;
1781 
1782 	FOREACH_QUEUE(sc, que) {
1783 		if (que->rx.sc_rx_desc_ring != NULL) {
1784 			que->rx.sc_rx_desc_ring = NULL;
1785 			em_dma_free(sc, &que->rx.sc_rx_dma);
1786 		}
1787 		if (que->tx.sc_tx_desc_ring != NULL) {
1788 			que->tx.sc_tx_desc_ring = NULL;
1789 			em_dma_free(sc, &que->tx.sc_tx_dma);
1790 		}
1791 		if (que->tag)
1792 			pci_intr_disestablish(pc, que->tag);
1793 		que->tag = NULL;
1794 		que->eims = 0;
1795 		que->me = 0;
1796 		que->sc = NULL;
1797 	}
1798 	sc->legacy_irq = 0;
1799 	sc->msix_linkvec = 0;
1800 	sc->msix_queuesmask = 0;
1801 	if (sc->queues)
1802 		free(sc->queues, M_DEVBUF,
1803 		    sc->num_queues * sizeof(struct em_queue));
1804 	sc->num_queues = 0;
1805 	sc->queues = NULL;
1806 }
1807 
1808 /*********************************************************************
1809  *
1810  *  Initialize the hardware to a configuration as specified by the
1811  *  em_softc structure. The controller is reset, the EEPROM is
1812  *  verified, the MAC address is set, then the shared initialization
1813  *  routines are called.
1814  *
1815  **********************************************************************/
1816 int
1817 em_hardware_init(struct em_softc *sc)
1818 {
1819 	uint32_t ret_val;
1820 	u_int16_t rx_buffer_size;
1821 
1822 	INIT_DEBUGOUT("em_hardware_init: begin");
1823 	if (sc->hw.mac_type >= em_pch_spt)
1824 		em_flush_desc_rings(sc);
1825 	/* Issue a global reset */
1826 	em_reset_hw(&sc->hw);
1827 
1828 	/* When hardware is reset, fifo_head is also reset */
1829 	sc->tx_fifo_head = 0;
1830 
1831 	/* Make sure we have a good EEPROM before we read from it */
1832 	if (em_get_flash_presence_i210(&sc->hw) &&
1833 	    em_validate_eeprom_checksum(&sc->hw) < 0) {
1834 		/*
1835 		 * Some PCIe parts fail the first check due to
1836 		 * the link being in sleep state, call it again,
1837 		 * if it fails a second time its a real issue.
1838 		 */
1839 		if (em_validate_eeprom_checksum(&sc->hw) < 0) {
1840 			printf("%s: The EEPROM Checksum Is Not Valid\n",
1841 			       DEVNAME(sc));
1842 			return (EIO);
1843 		}
1844 	}
1845 
1846 	if (em_get_flash_presence_i210(&sc->hw) &&
1847 	    em_read_part_num(&sc->hw, &(sc->part_num)) < 0) {
1848 		printf("%s: EEPROM read error while reading part number\n",
1849 		       DEVNAME(sc));
1850 		return (EIO);
1851 	}
1852 
1853 	/* Set up smart power down as default off on newer adapters */
1854 	if (!em_smart_pwr_down &&
1855 	     (sc->hw.mac_type == em_82571 ||
1856 	      sc->hw.mac_type == em_82572 ||
1857 	      sc->hw.mac_type == em_82575 ||
1858 	      sc->hw.mac_type == em_82576 ||
1859 	      sc->hw.mac_type == em_82580 ||
1860 	      sc->hw.mac_type == em_i210 ||
1861 	      sc->hw.mac_type == em_i350 )) {
1862 		uint16_t phy_tmp = 0;
1863 
1864 		/* Speed up time to link by disabling smart power down */
1865 		em_read_phy_reg(&sc->hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
1866 		phy_tmp &= ~IGP02E1000_PM_SPD;
1867 		em_write_phy_reg(&sc->hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
1868 	}
1869 
1870 	em_legacy_irq_quirk_spt(sc);
1871 
1872 	/*
1873 	 * These parameters control the automatic generation (Tx) and
1874 	 * response (Rx) to Ethernet PAUSE frames.
1875 	 * - High water mark should allow for at least two frames to be
1876 	 *   received after sending an XOFF.
1877 	 * - Low water mark works best when it is very near the high water mark.
1878 	 *   This allows the receiver to restart by sending XON when it has
1879 	 *   drained a bit.  Here we use an arbitary value of 1500 which will
1880 	 *   restart after one full frame is pulled from the buffer.  There
1881 	 *   could be several smaller frames in the buffer and if so they will
1882 	 *   not trigger the XON until their total number reduces the buffer
1883 	 *   by 1500.
1884 	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
1885 	 */
1886 	rx_buffer_size = ((E1000_READ_REG(&sc->hw, PBA) & 0xffff) << 10 );
1887 
1888 	sc->hw.fc_high_water = rx_buffer_size -
1889 	    EM_ROUNDUP(sc->hw.max_frame_size, 1024);
1890 	sc->hw.fc_low_water = sc->hw.fc_high_water - 1500;
1891 	if (sc->hw.mac_type == em_80003es2lan)
1892 		sc->hw.fc_pause_time = 0xFFFF;
1893 	else
1894 		sc->hw.fc_pause_time = 1000;
1895 	sc->hw.fc_send_xon = TRUE;
1896 	sc->hw.fc = E1000_FC_FULL;
1897 
1898 	em_disable_aspm(sc);
1899 
1900 	if ((ret_val = em_init_hw(sc)) != 0) {
1901 		if (ret_val == E1000_DEFER_INIT) {
1902 			INIT_DEBUGOUT("\nHardware Initialization Deferred ");
1903 			return (EAGAIN);
1904 		}
1905 		printf("\n%s: Hardware Initialization Failed: %d\n",
1906 		       DEVNAME(sc), ret_val);
1907 		return (EIO);
1908 	}
1909 
1910 	em_check_for_link(&sc->hw);
1911 
1912 	return (0);
1913 }
1914 
1915 /*********************************************************************
1916  *
1917  *  Setup networking device structure and register an interface.
1918  *
1919  **********************************************************************/
1920 void
1921 em_setup_interface(struct em_softc *sc)
1922 {
1923 	struct ifnet   *ifp;
1924 	uint64_t fiber_type = IFM_1000_SX;
1925 
1926 	INIT_DEBUGOUT("em_setup_interface: begin");
1927 
1928 	ifp = &sc->sc_ac.ac_if;
1929 	strlcpy(ifp->if_xname, DEVNAME(sc), IFNAMSIZ);
1930 	ifp->if_softc = sc;
1931 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1932 	ifp->if_xflags = IFXF_MPSAFE;
1933 	ifp->if_ioctl = em_ioctl;
1934 	ifp->if_qstart = em_start;
1935 	ifp->if_watchdog = em_watchdog;
1936 	ifp->if_hardmtu =
1937 		sc->hw.max_frame_size - ETHER_HDR_LEN - ETHER_CRC_LEN;
1938 	IFQ_SET_MAXLEN(&ifp->if_snd, sc->sc_tx_slots - 1);
1939 
1940 	ifp->if_capabilities = IFCAP_VLAN_MTU;
1941 
1942 #if NVLAN > 0
1943 	if (sc->hw.mac_type != em_82575 && sc->hw.mac_type != em_82580 &&
1944 	    sc->hw.mac_type != em_82576 &&
1945 	    sc->hw.mac_type != em_i210 && sc->hw.mac_type != em_i350)
1946 		ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING;
1947 #endif
1948 
1949 	if (sc->hw.mac_type >= em_82543 && sc->hw.mac_type != em_82575 &&
1950 	    sc->hw.mac_type != em_82576 &&
1951 	    sc->hw.mac_type != em_82580 && sc->hw.mac_type != em_i210 &&
1952 	    sc->hw.mac_type != em_i350)
1953 		ifp->if_capabilities |= IFCAP_CSUM_TCPv4 | IFCAP_CSUM_UDPv4;
1954 
1955 	/*
1956 	 * Specify the media types supported by this adapter and register
1957 	 * callbacks to update media and link information
1958 	 */
1959 	ifmedia_init(&sc->media, IFM_IMASK, em_media_change,
1960 		     em_media_status);
1961 	if (sc->hw.media_type == em_media_type_fiber ||
1962 	    sc->hw.media_type == em_media_type_internal_serdes) {
1963 		if (sc->hw.mac_type == em_82545)
1964 			fiber_type = IFM_1000_LX;
1965 		ifmedia_add(&sc->media, IFM_ETHER | fiber_type | IFM_FDX,
1966 			    0, NULL);
1967 		ifmedia_add(&sc->media, IFM_ETHER | fiber_type,
1968 			    0, NULL);
1969 	} else {
1970 		ifmedia_add(&sc->media, IFM_ETHER | IFM_10_T, 0, NULL);
1971 		ifmedia_add(&sc->media, IFM_ETHER | IFM_10_T | IFM_FDX,
1972 			    0, NULL);
1973 		ifmedia_add(&sc->media, IFM_ETHER | IFM_100_TX,
1974 			    0, NULL);
1975 		ifmedia_add(&sc->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
1976 			    0, NULL);
1977 		if (sc->hw.phy_type != em_phy_ife) {
1978 			ifmedia_add(&sc->media, IFM_ETHER | IFM_1000_T | IFM_FDX,
1979 				    0, NULL);
1980 			ifmedia_add(&sc->media, IFM_ETHER | IFM_1000_T, 0, NULL);
1981 		}
1982 	}
1983 	ifmedia_add(&sc->media, IFM_ETHER | IFM_AUTO, 0, NULL);
1984 	ifmedia_set(&sc->media, IFM_ETHER | IFM_AUTO);
1985 
1986 	if_attach(ifp);
1987 	ether_ifattach(ifp);
1988 	em_enable_intr(sc);
1989 }
1990 
1991 int
1992 em_detach(struct device *self, int flags)
1993 {
1994 	struct em_softc *sc = (struct em_softc *)self;
1995 	struct ifnet *ifp = &sc->sc_ac.ac_if;
1996 	struct pci_attach_args *pa = &sc->osdep.em_pa;
1997 	pci_chipset_tag_t	pc = pa->pa_pc;
1998 
1999 	if (sc->sc_intrhand)
2000 		pci_intr_disestablish(pc, sc->sc_intrhand);
2001 	sc->sc_intrhand = 0;
2002 
2003 	em_stop(sc, 1);
2004 
2005 	em_free_pci_resources(sc);
2006 
2007 	ether_ifdetach(ifp);
2008 	if_detach(ifp);
2009 
2010 	return (0);
2011 }
2012 
2013 int
2014 em_activate(struct device *self, int act)
2015 {
2016 	struct em_softc *sc = (struct em_softc *)self;
2017 	struct ifnet *ifp = &sc->sc_ac.ac_if;
2018 	int rv = 0;
2019 
2020 	switch (act) {
2021 	case DVACT_SUSPEND:
2022 		if (ifp->if_flags & IFF_RUNNING)
2023 			em_stop(sc, 0);
2024 		/* We have no children atm, but we will soon */
2025 		rv = config_activate_children(self, act);
2026 		break;
2027 	case DVACT_RESUME:
2028 		if (ifp->if_flags & IFF_UP)
2029 			em_init(sc);
2030 		break;
2031 	default:
2032 		rv = config_activate_children(self, act);
2033 		break;
2034 	}
2035 	return (rv);
2036 }
2037 
2038 /*********************************************************************
2039  *
2040  *  Workaround for SmartSpeed on 82541 and 82547 controllers
2041  *
2042  **********************************************************************/
2043 void
2044 em_smartspeed(struct em_softc *sc)
2045 {
2046 	uint16_t phy_tmp;
2047 
2048 	if (sc->link_active || (sc->hw.phy_type != em_phy_igp) ||
2049 	    !sc->hw.autoneg || !(sc->hw.autoneg_advertised & ADVERTISE_1000_FULL))
2050 		return;
2051 
2052 	if (sc->smartspeed == 0) {
2053 		/* If Master/Slave config fault is asserted twice,
2054 		 * we assume back-to-back */
2055 		em_read_phy_reg(&sc->hw, PHY_1000T_STATUS, &phy_tmp);
2056 		if (!(phy_tmp & SR_1000T_MS_CONFIG_FAULT))
2057 			return;
2058 		em_read_phy_reg(&sc->hw, PHY_1000T_STATUS, &phy_tmp);
2059 		if (phy_tmp & SR_1000T_MS_CONFIG_FAULT) {
2060 			em_read_phy_reg(&sc->hw, PHY_1000T_CTRL,
2061 					&phy_tmp);
2062 			if (phy_tmp & CR_1000T_MS_ENABLE) {
2063 				phy_tmp &= ~CR_1000T_MS_ENABLE;
2064 				em_write_phy_reg(&sc->hw,
2065 						    PHY_1000T_CTRL, phy_tmp);
2066 				sc->smartspeed++;
2067 				if (sc->hw.autoneg &&
2068 				    !em_phy_setup_autoneg(&sc->hw) &&
2069 				    !em_read_phy_reg(&sc->hw, PHY_CTRL,
2070 						       &phy_tmp)) {
2071 					phy_tmp |= (MII_CR_AUTO_NEG_EN |
2072 						    MII_CR_RESTART_AUTO_NEG);
2073 					em_write_phy_reg(&sc->hw,
2074 							 PHY_CTRL, phy_tmp);
2075 				}
2076 			}
2077 		}
2078 		return;
2079 	} else if (sc->smartspeed == EM_SMARTSPEED_DOWNSHIFT) {
2080 		/* If still no link, perhaps using 2/3 pair cable */
2081 		em_read_phy_reg(&sc->hw, PHY_1000T_CTRL, &phy_tmp);
2082 		phy_tmp |= CR_1000T_MS_ENABLE;
2083 		em_write_phy_reg(&sc->hw, PHY_1000T_CTRL, phy_tmp);
2084 		if (sc->hw.autoneg &&
2085 		    !em_phy_setup_autoneg(&sc->hw) &&
2086 		    !em_read_phy_reg(&sc->hw, PHY_CTRL, &phy_tmp)) {
2087 			phy_tmp |= (MII_CR_AUTO_NEG_EN |
2088 				    MII_CR_RESTART_AUTO_NEG);
2089 			em_write_phy_reg(&sc->hw, PHY_CTRL, phy_tmp);
2090 		}
2091 	}
2092 	/* Restart process after EM_SMARTSPEED_MAX iterations */
2093 	if (sc->smartspeed++ == EM_SMARTSPEED_MAX)
2094 		sc->smartspeed = 0;
2095 }
2096 
2097 /*
2098  * Manage DMA'able memory.
2099  */
2100 int
2101 em_dma_malloc(struct em_softc *sc, bus_size_t size, struct em_dma_alloc *dma)
2102 {
2103 	int r;
2104 
2105 	r = bus_dmamap_create(sc->sc_dmat, size, 1,
2106 	    size, 0, BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, &dma->dma_map);
2107 	if (r != 0)
2108 		return (r);
2109 
2110 	r = bus_dmamem_alloc(sc->sc_dmat, size, PAGE_SIZE, 0, &dma->dma_seg,
2111 	    1, &dma->dma_nseg, BUS_DMA_WAITOK | BUS_DMA_ZERO);
2112 	if (r != 0)
2113 		goto destroy;
2114 
2115 	r = bus_dmamem_map(sc->sc_dmat, &dma->dma_seg, dma->dma_nseg, size,
2116 	    &dma->dma_vaddr, BUS_DMA_WAITOK | BUS_DMA_COHERENT);
2117 	if (r != 0)
2118 		goto free;
2119 
2120 	r = bus_dmamap_load(sc->sc_dmat, dma->dma_map, dma->dma_vaddr, size,
2121 	    NULL, BUS_DMA_WAITOK);
2122 	if (r != 0)
2123 		goto unmap;
2124 
2125 	dma->dma_size = size;
2126 	return (0);
2127 
2128 unmap:
2129 	bus_dmamem_unmap(sc->sc_dmat, dma->dma_vaddr, size);
2130 free:
2131 	bus_dmamem_free(sc->sc_dmat, &dma->dma_seg, dma->dma_nseg);
2132 destroy:
2133 	bus_dmamap_destroy(sc->sc_dmat, dma->dma_map);
2134 
2135 	return (r);
2136 }
2137 
2138 void
2139 em_dma_free(struct em_softc *sc, struct em_dma_alloc *dma)
2140 {
2141 	bus_dmamap_unload(sc->sc_dmat, dma->dma_map);
2142 	bus_dmamem_unmap(sc->sc_dmat, dma->dma_vaddr, dma->dma_size);
2143 	bus_dmamem_free(sc->sc_dmat, &dma->dma_seg, dma->dma_nseg);
2144 	bus_dmamap_destroy(sc->sc_dmat, dma->dma_map);
2145 }
2146 
2147 /*********************************************************************
2148  *
2149  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2150  *  the information needed to transmit a packet on the wire.
2151  *
2152  **********************************************************************/
2153 int
2154 em_allocate_transmit_structures(struct em_softc *sc)
2155 {
2156 	struct em_queue *que;
2157 
2158 	FOREACH_QUEUE(sc, que) {
2159 		bus_dmamap_sync(sc->sc_dmat, que->tx.sc_tx_dma.dma_map,
2160 		    0, que->tx.sc_tx_dma.dma_map->dm_mapsize,
2161 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2162 
2163 		que->tx.sc_tx_pkts_ring = mallocarray(sc->sc_tx_slots,
2164 		    sizeof(*que->tx.sc_tx_pkts_ring), M_DEVBUF, M_NOWAIT | M_ZERO);
2165 		if (que->tx.sc_tx_pkts_ring == NULL) {
2166 			printf("%s: Unable to allocate tx_buffer memory\n",
2167 			    DEVNAME(sc));
2168 			return (ENOMEM);
2169 		}
2170 	}
2171 
2172 	return (0);
2173 }
2174 
2175 /*********************************************************************
2176  *
2177  *  Allocate and initialize transmit structures.
2178  *
2179  **********************************************************************/
2180 int
2181 em_setup_transmit_structures(struct em_softc *sc)
2182 {
2183 	struct em_queue *que;
2184 	struct em_packet *pkt;
2185 	int error, i;
2186 
2187 	if ((error = em_allocate_transmit_structures(sc)) != 0)
2188 		goto fail;
2189 
2190 	FOREACH_QUEUE(sc, que) {
2191 		bzero((void *) que->tx.sc_tx_desc_ring,
2192 		    (sizeof(struct em_tx_desc)) * sc->sc_tx_slots);
2193 
2194 		for (i = 0; i < sc->sc_tx_slots; i++) {
2195 			pkt = &que->tx.sc_tx_pkts_ring[i];
2196 			error = bus_dmamap_create(sc->sc_dmat, MAX_JUMBO_FRAME_SIZE,
2197 			    EM_MAX_SCATTER / (sc->pcix_82544 ? 2 : 1),
2198 			    MAX_JUMBO_FRAME_SIZE, 0, BUS_DMA_NOWAIT, &pkt->pkt_map);
2199 			if (error != 0) {
2200 				printf("%s: Unable to create TX DMA map\n",
2201 				    DEVNAME(sc));
2202 				goto fail;
2203 			}
2204 		}
2205 
2206 		que->tx.sc_tx_desc_head = 0;
2207 		que->tx.sc_tx_desc_tail = 0;
2208 
2209 		/* Set checksum context */
2210 		que->tx.active_checksum_context = OFFLOAD_NONE;
2211 	}
2212 
2213 	return (0);
2214 
2215 fail:
2216 	em_free_transmit_structures(sc);
2217 	return (error);
2218 }
2219 
2220 /*********************************************************************
2221  *
2222  *  Enable transmit unit.
2223  *
2224  **********************************************************************/
2225 void
2226 em_initialize_transmit_unit(struct em_softc *sc)
2227 {
2228 	u_int32_t	reg_tctl, reg_tipg = 0;
2229 	u_int64_t	bus_addr;
2230 	struct em_queue *que;
2231 
2232 	INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
2233 
2234 	FOREACH_QUEUE(sc, que) {
2235 		/* Setup the Base and Length of the Tx Descriptor Ring */
2236 		bus_addr = que->tx.sc_tx_dma.dma_map->dm_segs[0].ds_addr;
2237 		E1000_WRITE_REG(&sc->hw, TDLEN(que->me),
2238 		    sc->sc_tx_slots *
2239 		    sizeof(struct em_tx_desc));
2240 		E1000_WRITE_REG(&sc->hw, TDBAH(que->me), (u_int32_t)(bus_addr >> 32));
2241 		E1000_WRITE_REG(&sc->hw, TDBAL(que->me), (u_int32_t)bus_addr);
2242 
2243 		/* Setup the HW Tx Head and Tail descriptor pointers */
2244 		E1000_WRITE_REG(&sc->hw, TDT(que->me), 0);
2245 		E1000_WRITE_REG(&sc->hw, TDH(que->me), 0);
2246 
2247 		HW_DEBUGOUT2("Base = %x, Length = %x\n",
2248 		    E1000_READ_REG(&sc->hw, TDBAL(que->me)),
2249 		    E1000_READ_REG(&sc->hw, TDLEN(que->me)));
2250 
2251 		/* Set the default values for the Tx Inter Packet Gap timer */
2252 		switch (sc->hw.mac_type) {
2253 		case em_82542_rev2_0:
2254 		case em_82542_rev2_1:
2255 			reg_tipg = DEFAULT_82542_TIPG_IPGT;
2256 			reg_tipg |= DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
2257 			reg_tipg |= DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
2258 			break;
2259 		case em_80003es2lan:
2260 			reg_tipg = DEFAULT_82543_TIPG_IPGR1;
2261 			reg_tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
2262 			break;
2263 		default:
2264 			if (sc->hw.media_type == em_media_type_fiber ||
2265 			    sc->hw.media_type == em_media_type_internal_serdes)
2266 				reg_tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
2267 			else
2268 				reg_tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
2269 			reg_tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
2270 			reg_tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
2271 		}
2272 
2273 
2274 		E1000_WRITE_REG(&sc->hw, TIPG, reg_tipg);
2275 		E1000_WRITE_REG(&sc->hw, TIDV, sc->tx_int_delay);
2276 		if (sc->hw.mac_type >= em_82540)
2277 			E1000_WRITE_REG(&sc->hw, TADV, sc->tx_abs_int_delay);
2278 
2279 		/* Setup Transmit Descriptor Base Settings */
2280 		que->tx.sc_txd_cmd = E1000_TXD_CMD_IFCS;
2281 
2282 		if (sc->hw.mac_type == em_82575 || sc->hw.mac_type == em_82580 ||
2283 		    sc->hw.mac_type == em_82576 ||
2284 		    sc->hw.mac_type == em_i210 || sc->hw.mac_type == em_i350) {
2285 			/* 82575/6 need to enable the TX queue and lack the IDE bit */
2286 			reg_tctl = E1000_READ_REG(&sc->hw, TXDCTL(que->me));
2287 			reg_tctl |= E1000_TXDCTL_QUEUE_ENABLE;
2288 			E1000_WRITE_REG(&sc->hw, TXDCTL(que->me), reg_tctl);
2289 		} else if (sc->tx_int_delay > 0)
2290 			que->tx.sc_txd_cmd |= E1000_TXD_CMD_IDE;
2291 	}
2292 
2293 	/* Program the Transmit Control Register */
2294 	reg_tctl = E1000_TCTL_PSP | E1000_TCTL_EN |
2295 		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2296 	if (sc->hw.mac_type >= em_82571)
2297 		reg_tctl |= E1000_TCTL_MULR;
2298 	if (sc->link_duplex == FULL_DUPLEX)
2299 		reg_tctl |= E1000_FDX_COLLISION_DISTANCE << E1000_COLD_SHIFT;
2300 	else
2301 		reg_tctl |= E1000_HDX_COLLISION_DISTANCE << E1000_COLD_SHIFT;
2302 	/* This write will effectively turn on the transmit unit */
2303 	E1000_WRITE_REG(&sc->hw, TCTL, reg_tctl);
2304 
2305 	/* SPT Si errata workaround to avoid data corruption */
2306 
2307 	if (sc->hw.mac_type == em_pch_spt) {
2308 		uint32_t	reg_val;
2309 
2310 		reg_val = EM_READ_REG(&sc->hw, E1000_IOSFPC);
2311 		reg_val |= E1000_RCTL_RDMTS_HEX;
2312 		EM_WRITE_REG(&sc->hw, E1000_IOSFPC, reg_val);
2313 
2314 		reg_val = E1000_READ_REG(&sc->hw, TARC0);
2315 		/* i218-i219 Specification Update 1.5.4.5 */
2316 		reg_val &= ~E1000_TARC0_CB_MULTIQ_3_REQ;
2317 		reg_val |= E1000_TARC0_CB_MULTIQ_2_REQ;
2318 		E1000_WRITE_REG(&sc->hw, TARC0, reg_val);
2319 	}
2320 }
2321 
2322 /*********************************************************************
2323  *
2324  *  Free all transmit related data structures.
2325  *
2326  **********************************************************************/
2327 void
2328 em_free_transmit_structures(struct em_softc *sc)
2329 {
2330 	struct em_queue *que;
2331 	struct em_packet *pkt;
2332 	int i;
2333 
2334 	INIT_DEBUGOUT("free_transmit_structures: begin");
2335 
2336 	FOREACH_QUEUE(sc, que) {
2337 		if (que->tx.sc_tx_pkts_ring != NULL) {
2338 			for (i = 0; i < sc->sc_tx_slots; i++) {
2339 				pkt = &que->tx.sc_tx_pkts_ring[i];
2340 
2341 				if (pkt->pkt_m != NULL) {
2342 					bus_dmamap_sync(sc->sc_dmat, pkt->pkt_map,
2343 					    0, pkt->pkt_map->dm_mapsize,
2344 					    BUS_DMASYNC_POSTWRITE);
2345 					bus_dmamap_unload(sc->sc_dmat,
2346 					    pkt->pkt_map);
2347 
2348 					m_freem(pkt->pkt_m);
2349 					pkt->pkt_m = NULL;
2350 				}
2351 
2352 				if (pkt->pkt_map != NULL) {
2353 					bus_dmamap_destroy(sc->sc_dmat,
2354 					    pkt->pkt_map);
2355 					pkt->pkt_map = NULL;
2356 				}
2357 			}
2358 
2359 			free(que->tx.sc_tx_pkts_ring, M_DEVBUF,
2360 			    sc->sc_tx_slots * sizeof(*que->tx.sc_tx_pkts_ring));
2361 			que->tx.sc_tx_pkts_ring = NULL;
2362 		}
2363 
2364 		bus_dmamap_sync(sc->sc_dmat, que->tx.sc_tx_dma.dma_map,
2365 		    0, que->tx.sc_tx_dma.dma_map->dm_mapsize,
2366 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2367 	}
2368 }
2369 
2370 /*********************************************************************
2371  *
2372  *  The offload context needs to be set when we transfer the first
2373  *  packet of a particular protocol (TCP/UDP). We change the
2374  *  context only if the protocol type changes.
2375  *
2376  **********************************************************************/
2377 u_int
2378 em_transmit_checksum_setup(struct em_queue *que, struct mbuf *mp, u_int head,
2379     u_int32_t *txd_upper, u_int32_t *txd_lower)
2380 {
2381 	struct em_context_desc *TXD;
2382 
2383 	if (mp->m_pkthdr.csum_flags & M_TCP_CSUM_OUT) {
2384 		*txd_upper = E1000_TXD_POPTS_TXSM << 8;
2385 		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
2386 		if (que->tx.active_checksum_context == OFFLOAD_TCP_IP)
2387 			return (0);
2388 		else
2389 			que->tx.active_checksum_context = OFFLOAD_TCP_IP;
2390 	} else if (mp->m_pkthdr.csum_flags & M_UDP_CSUM_OUT) {
2391 		*txd_upper = E1000_TXD_POPTS_TXSM << 8;
2392 		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
2393 		if (que->tx.active_checksum_context == OFFLOAD_UDP_IP)
2394 			return (0);
2395 		else
2396 			que->tx.active_checksum_context = OFFLOAD_UDP_IP;
2397 	} else {
2398 		*txd_upper = 0;
2399 		*txd_lower = 0;
2400 		return (0);
2401 	}
2402 
2403 	/* If we reach this point, the checksum offload context
2404 	 * needs to be reset.
2405 	 */
2406 	TXD = (struct em_context_desc *)&que->tx.sc_tx_desc_ring[head];
2407 
2408 	TXD->lower_setup.ip_fields.ipcss = ETHER_HDR_LEN;
2409 	TXD->lower_setup.ip_fields.ipcso =
2410 	    ETHER_HDR_LEN + offsetof(struct ip, ip_sum);
2411 	TXD->lower_setup.ip_fields.ipcse =
2412 	    htole16(ETHER_HDR_LEN + sizeof(struct ip) - 1);
2413 
2414 	TXD->upper_setup.tcp_fields.tucss =
2415 	    ETHER_HDR_LEN + sizeof(struct ip);
2416 	TXD->upper_setup.tcp_fields.tucse = htole16(0);
2417 
2418 	if (que->tx.active_checksum_context == OFFLOAD_TCP_IP) {
2419 		TXD->upper_setup.tcp_fields.tucso =
2420 		    ETHER_HDR_LEN + sizeof(struct ip) +
2421 		    offsetof(struct tcphdr, th_sum);
2422 	} else if (que->tx.active_checksum_context == OFFLOAD_UDP_IP) {
2423 		TXD->upper_setup.tcp_fields.tucso =
2424 		    ETHER_HDR_LEN + sizeof(struct ip) +
2425 		    offsetof(struct udphdr, uh_sum);
2426 	}
2427 
2428 	TXD->tcp_seg_setup.data = htole32(0);
2429 	TXD->cmd_and_length = htole32(que->tx.sc_txd_cmd | E1000_TXD_CMD_DEXT);
2430 
2431 	return (1);
2432 }
2433 
2434 /**********************************************************************
2435  *
2436  *  Examine each tx_buffer in the used queue. If the hardware is done
2437  *  processing the packet then free associated resources. The
2438  *  tx_buffer is put back on the free queue.
2439  *
2440  **********************************************************************/
2441 void
2442 em_txeof(struct em_queue *que)
2443 {
2444 	struct em_softc *sc = que->sc;
2445 	struct ifnet *ifp = &sc->sc_ac.ac_if;
2446 	struct em_packet *pkt;
2447 	struct em_tx_desc *desc;
2448 	u_int head, tail;
2449 	u_int free = 0;
2450 
2451 	head = que->tx.sc_tx_desc_head;
2452 	tail = que->tx.sc_tx_desc_tail;
2453 
2454 	if (head == tail)
2455 		return;
2456 
2457 	bus_dmamap_sync(sc->sc_dmat, que->tx.sc_tx_dma.dma_map,
2458 	    0, que->tx.sc_tx_dma.dma_map->dm_mapsize,
2459 	    BUS_DMASYNC_POSTREAD);
2460 
2461 	do {
2462 		pkt = &que->tx.sc_tx_pkts_ring[tail];
2463 		desc = &que->tx.sc_tx_desc_ring[pkt->pkt_eop];
2464 
2465 		if (!ISSET(desc->upper.fields.status, E1000_TXD_STAT_DD))
2466 			break;
2467 
2468 		bus_dmamap_sync(sc->sc_dmat, pkt->pkt_map,
2469 		    0, pkt->pkt_map->dm_mapsize,
2470 		    BUS_DMASYNC_POSTWRITE);
2471 		bus_dmamap_unload(sc->sc_dmat, pkt->pkt_map);
2472 
2473 		KASSERT(pkt->pkt_m != NULL);
2474 
2475 		m_freem(pkt->pkt_m);
2476 		pkt->pkt_m = NULL;
2477 
2478 		tail = pkt->pkt_eop;
2479 
2480 		if (++tail == sc->sc_tx_slots)
2481 			tail = 0;
2482 
2483 		free++;
2484 	} while (tail != head);
2485 
2486 	bus_dmamap_sync(sc->sc_dmat, que->tx.sc_tx_dma.dma_map,
2487 	    0, que->tx.sc_tx_dma.dma_map->dm_mapsize,
2488 	    BUS_DMASYNC_PREREAD);
2489 
2490 	if (free == 0)
2491 		return;
2492 
2493 	que->tx.sc_tx_desc_tail = tail;
2494 
2495 	if (ifq_is_oactive(&ifp->if_snd))
2496 		ifq_restart(&ifp->if_snd);
2497 	else if (tail == head)
2498 		ifp->if_timer = 0;
2499 }
2500 
2501 /*********************************************************************
2502  *
2503  *  Get a buffer from system mbuf buffer pool.
2504  *
2505  **********************************************************************/
2506 int
2507 em_get_buf(struct em_queue *que, int i)
2508 {
2509 	struct em_softc *sc = que->sc;
2510 	struct mbuf    *m;
2511 	struct em_packet *pkt;
2512 	struct em_rx_desc *desc;
2513 	int error;
2514 
2515 	pkt = &que->rx.sc_rx_pkts_ring[i];
2516 	desc = &que->rx.sc_rx_desc_ring[i];
2517 
2518 	KASSERT(pkt->pkt_m == NULL);
2519 
2520 	m = MCLGETI(NULL, M_DONTWAIT, NULL, EM_MCLBYTES);
2521 	if (m == NULL) {
2522 		sc->mbuf_cluster_failed++;
2523 		return (ENOBUFS);
2524 	}
2525 	m->m_len = m->m_pkthdr.len = EM_MCLBYTES;
2526 	m_adj(m, ETHER_ALIGN);
2527 
2528 	error = bus_dmamap_load_mbuf(sc->sc_dmat, pkt->pkt_map,
2529 	    m, BUS_DMA_NOWAIT);
2530 	if (error) {
2531 		m_freem(m);
2532 		return (error);
2533 	}
2534 
2535 	bus_dmamap_sync(sc->sc_dmat, pkt->pkt_map,
2536 	    0, pkt->pkt_map->dm_mapsize,
2537 	    BUS_DMASYNC_PREREAD);
2538 	pkt->pkt_m = m;
2539 
2540 	memset(desc, 0, sizeof(*desc));
2541 	htolem64(&desc->buffer_addr, pkt->pkt_map->dm_segs[0].ds_addr);
2542 
2543 	return (0);
2544 }
2545 
2546 /*********************************************************************
2547  *
2548  *  Allocate memory for rx_buffer structures. Since we use one
2549  *  rx_buffer per received packet, the maximum number of rx_buffer's
2550  *  that we'll need is equal to the number of receive descriptors
2551  *  that we've allocated.
2552  *
2553  **********************************************************************/
2554 int
2555 em_allocate_receive_structures(struct em_softc *sc)
2556 {
2557 	struct em_queue *que;
2558 	struct em_packet *pkt;
2559 	int i;
2560 	int error;
2561 
2562 	FOREACH_QUEUE(sc, que) {
2563 		que->rx.sc_rx_pkts_ring = mallocarray(sc->sc_rx_slots,
2564 		    sizeof(*que->rx.sc_rx_pkts_ring),
2565 		    M_DEVBUF, M_NOWAIT | M_ZERO);
2566 		if (que->rx.sc_rx_pkts_ring == NULL) {
2567 			printf("%s: Unable to allocate rx_buffer memory\n",
2568 			    DEVNAME(sc));
2569 			return (ENOMEM);
2570 		}
2571 
2572 		bus_dmamap_sync(sc->sc_dmat, que->rx.sc_rx_dma.dma_map,
2573 		    0, que->rx.sc_rx_dma.dma_map->dm_mapsize,
2574 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2575 
2576 		for (i = 0; i < sc->sc_rx_slots; i++) {
2577 			pkt = &que->rx.sc_rx_pkts_ring[i];
2578 
2579 			error = bus_dmamap_create(sc->sc_dmat, EM_MCLBYTES, 1,
2580 			    EM_MCLBYTES, 0, BUS_DMA_NOWAIT, &pkt->pkt_map);
2581 			if (error != 0) {
2582 				printf("%s: em_allocate_receive_structures: "
2583 				    "bus_dmamap_create failed; error %u\n",
2584 				    DEVNAME(sc), error);
2585 				goto fail;
2586 			}
2587 
2588 			pkt->pkt_m = NULL;
2589 		}
2590 	}
2591 
2592         return (0);
2593 
2594 fail:
2595 	em_free_receive_structures(sc);
2596 	return (error);
2597 }
2598 
2599 /*********************************************************************
2600  *
2601  *  Allocate and initialize receive structures.
2602  *
2603  **********************************************************************/
2604 int
2605 em_setup_receive_structures(struct em_softc *sc)
2606 {
2607 	struct ifnet *ifp = &sc->sc_ac.ac_if;
2608 	struct em_queue *que;
2609 	u_int lwm;
2610 
2611 	if (em_allocate_receive_structures(sc))
2612 		return (ENOMEM);
2613 
2614 	FOREACH_QUEUE(sc, que) {
2615 		memset(que->rx.sc_rx_desc_ring, 0,
2616 		    sc->sc_rx_slots * sizeof(*que->rx.sc_rx_desc_ring));
2617 
2618 		/* Setup our descriptor pointers */
2619 		que->rx.sc_rx_desc_tail = 0;
2620 		que->rx.sc_rx_desc_head = sc->sc_rx_slots - 1;
2621 
2622 		lwm = max(4, 2 * ((ifp->if_hardmtu / MCLBYTES) + 1));
2623 		if_rxr_init(&que->rx.sc_rx_ring, lwm, sc->sc_rx_slots);
2624 
2625 		if (em_rxfill(que) == 0) {
2626 			printf("%s: unable to fill any rx descriptors\n",
2627 			    DEVNAME(sc));
2628 			return (ENOMEM);
2629 		}
2630 	}
2631 
2632 	return (0);
2633 }
2634 
2635 /*********************************************************************
2636  *
2637  *  Enable receive unit.
2638  *
2639  **********************************************************************/
2640 void
2641 em_initialize_receive_unit(struct em_softc *sc)
2642 {
2643 	struct em_queue *que;
2644 	u_int32_t	reg_rctl;
2645 	u_int32_t	reg_rxcsum;
2646 	u_int32_t	reg_srrctl;
2647 	u_int64_t	bus_addr;
2648 
2649 	INIT_DEBUGOUT("em_initialize_receive_unit: begin");
2650 
2651 	/* Make sure receives are disabled while setting up the descriptor ring */
2652 	E1000_WRITE_REG(&sc->hw, RCTL, 0);
2653 
2654 	/* Set the Receive Delay Timer Register */
2655 	E1000_WRITE_REG(&sc->hw, RDTR,
2656 			sc->rx_int_delay | E1000_RDT_FPDB);
2657 
2658 	if (sc->hw.mac_type >= em_82540) {
2659 		if (sc->rx_int_delay)
2660 			E1000_WRITE_REG(&sc->hw, RADV, sc->rx_abs_int_delay);
2661 
2662 		/* Set the interrupt throttling rate.  Value is calculated
2663 		 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns) */
2664 		E1000_WRITE_REG(&sc->hw, ITR, DEFAULT_ITR);
2665 	}
2666 
2667 	/* Setup the Receive Control Register */
2668 	reg_rctl = E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
2669 	    E1000_RCTL_RDMTS_HALF |
2670 	    (sc->hw.mc_filter_type << E1000_RCTL_MO_SHIFT);
2671 
2672 	if (sc->hw.tbi_compatibility_on == TRUE)
2673 		reg_rctl |= E1000_RCTL_SBP;
2674 
2675 	/*
2676 	 * The i350 has a bug where it always strips the CRC whether
2677 	 * asked to or not.  So ask for stripped CRC here and
2678 	 * cope in rxeof
2679 	 */
2680 	if (sc->hw.mac_type == em_i210 || sc->hw.mac_type == em_i350)
2681 		reg_rctl |= E1000_RCTL_SECRC;
2682 
2683 	switch (sc->sc_rx_buffer_len) {
2684 	default:
2685 	case EM_RXBUFFER_2048:
2686 		reg_rctl |= E1000_RCTL_SZ_2048;
2687 		break;
2688 	case EM_RXBUFFER_4096:
2689 		reg_rctl |= E1000_RCTL_SZ_4096|E1000_RCTL_BSEX|E1000_RCTL_LPE;
2690 		break;
2691 	case EM_RXBUFFER_8192:
2692 		reg_rctl |= E1000_RCTL_SZ_8192|E1000_RCTL_BSEX|E1000_RCTL_LPE;
2693 		break;
2694 	case EM_RXBUFFER_16384:
2695 		reg_rctl |= E1000_RCTL_SZ_16384|E1000_RCTL_BSEX|E1000_RCTL_LPE;
2696 		break;
2697 	}
2698 
2699 	if (sc->hw.max_frame_size != ETHER_MAX_LEN)
2700 		reg_rctl |= E1000_RCTL_LPE;
2701 
2702 	/* Enable 82543 Receive Checksum Offload for TCP and UDP */
2703 	if (sc->hw.mac_type >= em_82543) {
2704 		reg_rxcsum = E1000_READ_REG(&sc->hw, RXCSUM);
2705 		reg_rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
2706 		E1000_WRITE_REG(&sc->hw, RXCSUM, reg_rxcsum);
2707 	}
2708 
2709 	/*
2710 	 * XXX TEMPORARY WORKAROUND: on some systems with 82573
2711 	 * long latencies are observed, like Lenovo X60.
2712 	 */
2713 	if (sc->hw.mac_type == em_82573)
2714 		E1000_WRITE_REG(&sc->hw, RDTR, 0x20);
2715 
2716 	FOREACH_QUEUE(sc, que) {
2717 		if (sc->num_queues > 1) {
2718 			/*
2719 			 * Disable Drop Enable for every queue, default has
2720 			 * it enabled for queues > 0
2721 			 */
2722 			reg_srrctl = E1000_READ_REG(&sc->hw, SRRCTL(que->me));
2723 			reg_srrctl &= ~E1000_SRRCTL_DROP_EN;
2724 			E1000_WRITE_REG(&sc->hw, SRRCTL(que->me), reg_srrctl);
2725 		}
2726 
2727 		/* Setup the Base and Length of the Rx Descriptor Ring */
2728 		bus_addr = que->rx.sc_rx_dma.dma_map->dm_segs[0].ds_addr;
2729 		E1000_WRITE_REG(&sc->hw, RDLEN(que->me),
2730 		    sc->sc_rx_slots * sizeof(*que->rx.sc_rx_desc_ring));
2731 		E1000_WRITE_REG(&sc->hw, RDBAH(que->me), (u_int32_t)(bus_addr >> 32));
2732 		E1000_WRITE_REG(&sc->hw, RDBAL(que->me), (u_int32_t)bus_addr);
2733 
2734 		if (sc->hw.mac_type == em_82575 || sc->hw.mac_type == em_82580 ||
2735 		    sc->hw.mac_type == em_82576 ||
2736 		    sc->hw.mac_type == em_i210 || sc->hw.mac_type == em_i350) {
2737 			/* 82575/6 need to enable the RX queue */
2738 			uint32_t reg;
2739 			reg = E1000_READ_REG(&sc->hw, RXDCTL(que->me));
2740 			reg |= E1000_RXDCTL_QUEUE_ENABLE;
2741 			E1000_WRITE_REG(&sc->hw, RXDCTL(que->me), reg);
2742 		}
2743 	}
2744 
2745 	/* Enable Receives */
2746 	E1000_WRITE_REG(&sc->hw, RCTL, reg_rctl);
2747 
2748 	/* Setup the HW Rx Head and Tail Descriptor Pointers */
2749 	FOREACH_QUEUE(sc, que) {
2750 		E1000_WRITE_REG(&sc->hw, RDH(que->me), 0);
2751 		E1000_WRITE_REG(&sc->hw, RDT(que->me), que->rx.sc_rx_desc_head);
2752 	}
2753 }
2754 
2755 /*********************************************************************
2756  *
2757  *  Free receive related data structures.
2758  *
2759  **********************************************************************/
2760 void
2761 em_free_receive_structures(struct em_softc *sc)
2762 {
2763 	struct em_queue *que;
2764 	struct em_packet *pkt;
2765 	int i;
2766 
2767 	INIT_DEBUGOUT("free_receive_structures: begin");
2768 
2769 	FOREACH_QUEUE(sc, que) {
2770 		if_rxr_init(&que->rx.sc_rx_ring, 0, 0);
2771 
2772 		bus_dmamap_sync(sc->sc_dmat, que->rx.sc_rx_dma.dma_map,
2773 		    0, que->rx.sc_rx_dma.dma_map->dm_mapsize,
2774 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2775 
2776 		if (que->rx.sc_rx_pkts_ring != NULL) {
2777 			for (i = 0; i < sc->sc_rx_slots; i++) {
2778 				pkt = &que->rx.sc_rx_pkts_ring[i];
2779 				if (pkt->pkt_m != NULL) {
2780 					bus_dmamap_sync(sc->sc_dmat,
2781 					    pkt->pkt_map,
2782 					    0, pkt->pkt_map->dm_mapsize,
2783 					    BUS_DMASYNC_POSTREAD);
2784 					bus_dmamap_unload(sc->sc_dmat,
2785 					    pkt->pkt_map);
2786 					m_freem(pkt->pkt_m);
2787 					pkt->pkt_m = NULL;
2788 				}
2789 				bus_dmamap_destroy(sc->sc_dmat, pkt->pkt_map);
2790 			}
2791 
2792 			free(que->rx.sc_rx_pkts_ring, M_DEVBUF,
2793 			    sc->sc_rx_slots * sizeof(*que->rx.sc_rx_pkts_ring));
2794 			que->rx.sc_rx_pkts_ring = NULL;
2795 		}
2796 
2797 		if (que->rx.fmp != NULL) {
2798 			m_freem(que->rx.fmp);
2799 			que->rx.fmp = NULL;
2800 			que->rx.lmp = NULL;
2801 		}
2802 	}
2803 }
2804 
2805 int
2806 em_rxfill(struct em_queue *que)
2807 {
2808 	struct em_softc *sc = que->sc;
2809 	u_int slots;
2810 	int post = 0;
2811 	int i;
2812 
2813 	i = que->rx.sc_rx_desc_head;
2814 
2815 	bus_dmamap_sync(sc->sc_dmat, que->rx.sc_rx_dma.dma_map,
2816 	    0, que->rx.sc_rx_dma.dma_map->dm_mapsize,
2817 	    BUS_DMASYNC_POSTWRITE);
2818 
2819 	for (slots = if_rxr_get(&que->rx.sc_rx_ring, sc->sc_rx_slots);
2820 	    slots > 0; slots--) {
2821 		if (++i == sc->sc_rx_slots)
2822 			i = 0;
2823 
2824 		if (em_get_buf(que, i) != 0)
2825 			break;
2826 
2827 		que->rx.sc_rx_desc_head = i;
2828 		post = 1;
2829 	}
2830 
2831 	if_rxr_put(&que->rx.sc_rx_ring, slots);
2832 
2833 	bus_dmamap_sync(sc->sc_dmat, que->rx.sc_rx_dma.dma_map,
2834 	    0, que->rx.sc_rx_dma.dma_map->dm_mapsize,
2835 	    BUS_DMASYNC_PREWRITE);
2836 
2837 	return (post);
2838 }
2839 
2840 void
2841 em_rxrefill(void *arg)
2842 {
2843 	struct em_queue *que = arg;
2844 	struct em_softc *sc = que->sc;
2845 
2846 	if (em_rxfill(que))
2847 		E1000_WRITE_REG(&sc->hw, RDT(que->me), que->rx.sc_rx_desc_head);
2848 	else if (if_rxr_needrefill(&que->rx.sc_rx_ring))
2849 		timeout_add(&que->rx_refill, 1);
2850 }
2851 
2852 /*********************************************************************
2853  *
2854  *  This routine executes in interrupt context. It replenishes
2855  *  the mbufs in the descriptor and sends data which has been
2856  *  dma'ed into host memory to upper layer.
2857  *
2858  *********************************************************************/
2859 int
2860 em_rxeof(struct em_queue *que)
2861 {
2862 	struct em_softc	    *sc = que->sc;
2863 	struct ifnet	    *ifp = &sc->sc_ac.ac_if;
2864 	struct mbuf_list    ml = MBUF_LIST_INITIALIZER();
2865 	struct mbuf	    *m;
2866 	u_int8_t	    accept_frame = 0;
2867 	u_int8_t	    eop = 0;
2868 	u_int16_t	    len, desc_len, prev_len_adj;
2869 	int		    i, rv = 0;
2870 
2871 	/* Pointer to the receive descriptor being examined. */
2872 	struct em_rx_desc   *desc;
2873 	struct em_packet    *pkt;
2874 	u_int8_t	    status;
2875 
2876 	if (if_rxr_inuse(&que->rx.sc_rx_ring) == 0)
2877 		return (0);
2878 
2879 	i = que->rx.sc_rx_desc_tail;
2880 
2881 	bus_dmamap_sync(sc->sc_dmat, que->rx.sc_rx_dma.dma_map,
2882 	    0, que->rx.sc_rx_dma.dma_map->dm_mapsize,
2883 	    BUS_DMASYNC_POSTREAD);
2884 
2885 	do {
2886 		m = NULL;
2887 
2888 		pkt = &que->rx.sc_rx_pkts_ring[i];
2889 		desc = &que->rx.sc_rx_desc_ring[i];
2890 
2891 		status = desc->status;
2892 		if (!ISSET(status, E1000_RXD_STAT_DD))
2893 			break;
2894 
2895 		/* pull the mbuf off the ring */
2896 		bus_dmamap_sync(sc->sc_dmat, pkt->pkt_map,
2897 		    0, pkt->pkt_map->dm_mapsize,
2898 		    BUS_DMASYNC_POSTREAD);
2899 		bus_dmamap_unload(sc->sc_dmat, pkt->pkt_map);
2900 		m = pkt->pkt_m;
2901 		pkt->pkt_m = NULL;
2902 
2903 		KASSERT(m != NULL);
2904 
2905 		if_rxr_put(&que->rx.sc_rx_ring, 1);
2906 		rv = 1;
2907 
2908 		accept_frame = 1;
2909 		prev_len_adj = 0;
2910 		desc_len = letoh16(desc->length);
2911 
2912 		if (status & E1000_RXD_STAT_EOP) {
2913 			eop = 1;
2914 			if (desc_len < ETHER_CRC_LEN) {
2915 				len = 0;
2916 				prev_len_adj = ETHER_CRC_LEN - desc_len;
2917 			} else if (sc->hw.mac_type == em_i210 ||
2918 			    sc->hw.mac_type == em_i350)
2919 				len = desc_len;
2920 			else
2921 				len = desc_len - ETHER_CRC_LEN;
2922 		} else {
2923 			eop = 0;
2924 			len = desc_len;
2925 		}
2926 
2927 		if (desc->errors & E1000_RXD_ERR_FRAME_ERR_MASK) {
2928 			u_int8_t last_byte;
2929 			u_int32_t pkt_len = desc_len;
2930 
2931 			if (que->rx.fmp != NULL)
2932 				pkt_len += que->rx.fmp->m_pkthdr.len;
2933 
2934 			last_byte = *(mtod(m, caddr_t) + desc_len - 1);
2935 			if (TBI_ACCEPT(&sc->hw, status, desc->errors,
2936 			    pkt_len, last_byte)) {
2937 #ifndef SMALL_KERNEL
2938 				em_tbi_adjust_stats(&sc->hw, &sc->stats,
2939 				    pkt_len, sc->hw.mac_addr);
2940 #endif
2941 				if (len > 0)
2942 					len--;
2943 			} else
2944 				accept_frame = 0;
2945 		}
2946 
2947 		if (accept_frame) {
2948 			/* Assign correct length to the current fragment */
2949 			m->m_len = len;
2950 
2951 			if (que->rx.fmp == NULL) {
2952 				m->m_pkthdr.len = m->m_len;
2953 				que->rx.fmp = m;	 /* Store the first mbuf */
2954 				que->rx.lmp = m;
2955 			} else {
2956 				/* Chain mbuf's together */
2957 				m->m_flags &= ~M_PKTHDR;
2958 				/*
2959 				 * Adjust length of previous mbuf in chain if
2960 				 * we received less than 4 bytes in the last
2961 				 * descriptor.
2962 				 */
2963 				if (prev_len_adj > 0) {
2964 					que->rx.lmp->m_len -= prev_len_adj;
2965 					que->rx.fmp->m_pkthdr.len -= prev_len_adj;
2966 				}
2967 				que->rx.lmp->m_next = m;
2968 				que->rx.lmp = m;
2969 				que->rx.fmp->m_pkthdr.len += m->m_len;
2970 			}
2971 
2972 			if (eop) {
2973 				m = que->rx.fmp;
2974 
2975 				em_receive_checksum(sc, desc, m);
2976 #if NVLAN > 0
2977 				if (desc->status & E1000_RXD_STAT_VP) {
2978 					m->m_pkthdr.ether_vtag =
2979 					    letoh16(desc->special);
2980 					m->m_flags |= M_VLANTAG;
2981 				}
2982 #endif
2983 				ml_enqueue(&ml, m);
2984 
2985 				que->rx.fmp = NULL;
2986 				que->rx.lmp = NULL;
2987 			}
2988 		} else {
2989 			que->rx.dropped_pkts++;
2990 
2991 			if (que->rx.fmp != NULL) {
2992 				m_freem(que->rx.fmp);
2993 				que->rx.fmp = NULL;
2994 				que->rx.lmp = NULL;
2995 			}
2996 
2997 			m_freem(m);
2998 		}
2999 
3000 		/* Advance our pointers to the next descriptor. */
3001 		if (++i == sc->sc_rx_slots)
3002 			i = 0;
3003 	} while (if_rxr_inuse(&que->rx.sc_rx_ring) > 0);
3004 
3005 	bus_dmamap_sync(sc->sc_dmat, que->rx.sc_rx_dma.dma_map,
3006 	    0, que->rx.sc_rx_dma.dma_map->dm_mapsize,
3007 	    BUS_DMASYNC_PREREAD);
3008 
3009 	que->rx.sc_rx_desc_tail = i;
3010 
3011 	if (ifiq_input(&ifp->if_rcv, &ml))
3012 		if_rxr_livelocked(&que->rx.sc_rx_ring);
3013 
3014 	return (rv);
3015 }
3016 
3017 /*********************************************************************
3018  *
3019  *  Verify that the hardware indicated that the checksum is valid.
3020  *  Inform the stack about the status of checksum so that stack
3021  *  doesn't spend time verifying the checksum.
3022  *
3023  *********************************************************************/
3024 void
3025 em_receive_checksum(struct em_softc *sc, struct em_rx_desc *rx_desc,
3026     struct mbuf *mp)
3027 {
3028 	/* 82543 or newer only */
3029 	if ((sc->hw.mac_type < em_82543) ||
3030 	    /* Ignore Checksum bit is set */
3031 	    (rx_desc->status & E1000_RXD_STAT_IXSM)) {
3032 		mp->m_pkthdr.csum_flags = 0;
3033 		return;
3034 	}
3035 
3036 	if (rx_desc->status & E1000_RXD_STAT_IPCS) {
3037 		/* Did it pass? */
3038 		if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
3039 			/* IP Checksum Good */
3040 			mp->m_pkthdr.csum_flags = M_IPV4_CSUM_IN_OK;
3041 
3042 		} else
3043 			mp->m_pkthdr.csum_flags = 0;
3044 	}
3045 
3046 	if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
3047 		/* Did it pass? */
3048 		if (!(rx_desc->errors & E1000_RXD_ERR_TCPE))
3049 			mp->m_pkthdr.csum_flags |=
3050 				M_TCP_CSUM_IN_OK | M_UDP_CSUM_IN_OK;
3051 	}
3052 }
3053 
3054 /*
3055  * This turns on the hardware offload of the VLAN
3056  * tag insertion and strip
3057  */
3058 void
3059 em_enable_hw_vlans(struct em_softc *sc)
3060 {
3061 	uint32_t ctrl;
3062 
3063 	ctrl = E1000_READ_REG(&sc->hw, CTRL);
3064 	ctrl |= E1000_CTRL_VME;
3065 	E1000_WRITE_REG(&sc->hw, CTRL, ctrl);
3066 }
3067 
3068 void
3069 em_enable_intr(struct em_softc *sc)
3070 {
3071 	uint32_t mask;
3072 
3073 	if (sc->msix) {
3074 		mask = sc->msix_queuesmask | sc->msix_linkmask;
3075 		E1000_WRITE_REG(&sc->hw, EIAC, mask);
3076 		E1000_WRITE_REG(&sc->hw, EIAM, mask);
3077 		E1000_WRITE_REG(&sc->hw, EIMS, mask);
3078 		E1000_WRITE_REG(&sc->hw, IMS, E1000_IMS_LSC);
3079 	} else
3080 		E1000_WRITE_REG(&sc->hw, IMS, (IMS_ENABLE_MASK));
3081 }
3082 
3083 void
3084 em_disable_intr(struct em_softc *sc)
3085 {
3086 	/*
3087 	 * The first version of 82542 had an errata where when link
3088 	 * was forced it would stay up even if the cable was disconnected
3089 	 * Sequence errors were used to detect the disconnect and then
3090 	 * the driver would unforce the link.  This code is in the ISR.
3091 	 * For this to work correctly the Sequence error interrupt had
3092 	 * to be enabled all the time.
3093 	 */
3094 	if (sc->msix) {
3095 		E1000_WRITE_REG(&sc->hw, EIMC, ~0);
3096 		E1000_WRITE_REG(&sc->hw, EIAC, 0);
3097 	} else if (sc->hw.mac_type == em_82542_rev2_0)
3098 		E1000_WRITE_REG(&sc->hw, IMC, (0xffffffff & ~E1000_IMC_RXSEQ));
3099 	else
3100 		E1000_WRITE_REG(&sc->hw, IMC, 0xffffffff);
3101 }
3102 
3103 void
3104 em_write_pci_cfg(struct em_hw *hw, uint32_t reg, uint16_t *value)
3105 {
3106 	struct pci_attach_args *pa = &((struct em_osdep *)hw->back)->em_pa;
3107 	pcireg_t val;
3108 
3109 	val = pci_conf_read(pa->pa_pc, pa->pa_tag, reg & ~0x3);
3110 	if (reg & 0x2) {
3111 		val &= 0x0000ffff;
3112 		val |= (*value << 16);
3113 	} else {
3114 		val &= 0xffff0000;
3115 		val |= *value;
3116 	}
3117 	pci_conf_write(pa->pa_pc, pa->pa_tag, reg & ~0x3, val);
3118 }
3119 
3120 void
3121 em_read_pci_cfg(struct em_hw *hw, uint32_t reg, uint16_t *value)
3122 {
3123 	struct pci_attach_args *pa = &((struct em_osdep *)hw->back)->em_pa;
3124 	pcireg_t val;
3125 
3126 	val = pci_conf_read(pa->pa_pc, pa->pa_tag, reg & ~0x3);
3127 	if (reg & 0x2)
3128 		*value = (val >> 16) & 0xffff;
3129 	else
3130 		*value = val & 0xffff;
3131 }
3132 
3133 void
3134 em_pci_set_mwi(struct em_hw *hw)
3135 {
3136 	struct pci_attach_args *pa = &((struct em_osdep *)hw->back)->em_pa;
3137 
3138 	pci_conf_write(pa->pa_pc, pa->pa_tag, PCI_COMMAND_STATUS_REG,
3139 		(hw->pci_cmd_word | CMD_MEM_WRT_INVALIDATE));
3140 }
3141 
3142 void
3143 em_pci_clear_mwi(struct em_hw *hw)
3144 {
3145 	struct pci_attach_args *pa = &((struct em_osdep *)hw->back)->em_pa;
3146 
3147 	pci_conf_write(pa->pa_pc, pa->pa_tag, PCI_COMMAND_STATUS_REG,
3148 		(hw->pci_cmd_word & ~CMD_MEM_WRT_INVALIDATE));
3149 }
3150 
3151 /*
3152  * We may eventually really do this, but its unnecessary
3153  * for now so we just return unsupported.
3154  */
3155 int32_t
3156 em_read_pcie_cap_reg(struct em_hw *hw, uint32_t reg, uint16_t *value)
3157 {
3158 	return -E1000_NOT_IMPLEMENTED;
3159 }
3160 
3161 /*********************************************************************
3162 * 82544 Coexistence issue workaround.
3163 *    There are 2 issues.
3164 *       1. Transmit Hang issue.
3165 *    To detect this issue, following equation can be used...
3166 *          SIZE[3:0] + ADDR[2:0] = SUM[3:0].
3167 *          If SUM[3:0] is in between 1 to 4, we will have this issue.
3168 *
3169 *       2. DAC issue.
3170 *    To detect this issue, following equation can be used...
3171 *          SIZE[3:0] + ADDR[2:0] = SUM[3:0].
3172 *          If SUM[3:0] is in between 9 to c, we will have this issue.
3173 *
3174 *
3175 *    WORKAROUND:
3176 *          Make sure we do not have ending address as 1,2,3,4(Hang) or 9,a,b,c (DAC)
3177 *
3178 *** *********************************************************************/
3179 u_int32_t
3180 em_fill_descriptors(u_int64_t address, u_int32_t length,
3181     PDESC_ARRAY desc_array)
3182 {
3183         /* Since issue is sensitive to length and address.*/
3184         /* Let us first check the address...*/
3185         u_int32_t safe_terminator;
3186         if (length <= 4) {
3187                 desc_array->descriptor[0].address = address;
3188                 desc_array->descriptor[0].length = length;
3189                 desc_array->elements = 1;
3190                 return desc_array->elements;
3191         }
3192         safe_terminator = (u_int32_t)((((u_int32_t)address & 0x7) + (length & 0xF)) & 0xF);
3193         /* if it does not fall between 0x1 to 0x4 and 0x9 to 0xC then return */
3194         if (safe_terminator == 0   ||
3195         (safe_terminator > 4   &&
3196         safe_terminator < 9)   ||
3197         (safe_terminator > 0xC &&
3198         safe_terminator <= 0xF)) {
3199                 desc_array->descriptor[0].address = address;
3200                 desc_array->descriptor[0].length = length;
3201                 desc_array->elements = 1;
3202                 return desc_array->elements;
3203         }
3204 
3205         desc_array->descriptor[0].address = address;
3206         desc_array->descriptor[0].length = length - 4;
3207         desc_array->descriptor[1].address = address + (length - 4);
3208         desc_array->descriptor[1].length = 4;
3209         desc_array->elements = 2;
3210         return desc_array->elements;
3211 }
3212 
3213 /*
3214  * Disable the L0S and L1 LINK states.
3215  */
3216 void
3217 em_disable_aspm(struct em_softc *sc)
3218 {
3219 	int offset;
3220 	pcireg_t val;
3221 
3222 	switch (sc->hw.mac_type) {
3223 		case em_82571:
3224 		case em_82572:
3225 		case em_82573:
3226 		case em_82574:
3227 			break;
3228 		default:
3229 			return;
3230 	}
3231 
3232 	if (!pci_get_capability(sc->osdep.em_pa.pa_pc, sc->osdep.em_pa.pa_tag,
3233 	    PCI_CAP_PCIEXPRESS, &offset, NULL))
3234 		return;
3235 
3236 	/* Disable PCIe Active State Power Management (ASPM). */
3237 	val = pci_conf_read(sc->osdep.em_pa.pa_pc, sc->osdep.em_pa.pa_tag,
3238 	    offset + PCI_PCIE_LCSR);
3239 
3240 	switch (sc->hw.mac_type) {
3241 		case em_82571:
3242 		case em_82572:
3243 			val &= ~PCI_PCIE_LCSR_ASPM_L1;
3244 			break;
3245 		case em_82573:
3246 		case em_82574:
3247 			val &= ~(PCI_PCIE_LCSR_ASPM_L0S |
3248 			    PCI_PCIE_LCSR_ASPM_L1);
3249 			break;
3250 		default:
3251 			break;
3252 	}
3253 
3254 	pci_conf_write(sc->osdep.em_pa.pa_pc, sc->osdep.em_pa.pa_tag,
3255 	    offset + PCI_PCIE_LCSR, val);
3256 }
3257 
3258 /*
3259  * em_flush_tx_ring - remove all descriptors from the tx_ring
3260  *
3261  * We want to clear all pending descriptors from the TX ring.
3262  * zeroing happens when the HW reads the regs. We assign the ring itself as
3263  * the data of the next descriptor. We don't care about the data we are about
3264  * to reset the HW.
3265  */
3266 void
3267 em_flush_tx_ring(struct em_queue *que)
3268 {
3269 	struct em_softc		*sc = que->sc;
3270 	uint32_t		 tctl, txd_lower = E1000_TXD_CMD_IFCS;
3271 	uint16_t		 size = 512;
3272 	struct em_tx_desc	*txd;
3273 
3274 	KASSERT(que->tx.sc_tx_desc_ring != NULL);
3275 
3276 	tctl = EM_READ_REG(&sc->hw, E1000_TCTL);
3277 	EM_WRITE_REG(&sc->hw, E1000_TCTL, tctl | E1000_TCTL_EN);
3278 
3279 	KASSERT(EM_READ_REG(&sc->hw, E1000_TDT(que->me)) == que->tx.sc_tx_desc_head);
3280 
3281 	txd = &que->tx.sc_tx_desc_ring[que->tx.sc_tx_desc_head];
3282 	txd->buffer_addr = que->tx.sc_tx_dma.dma_map->dm_segs[0].ds_addr;
3283 	txd->lower.data = htole32(txd_lower | size);
3284 	txd->upper.data = 0;
3285 
3286 	/* flush descriptors to memory before notifying the HW */
3287 	bus_space_barrier(sc->osdep.mem_bus_space_tag,
3288 	    sc->osdep.mem_bus_space_handle, 0, 0, BUS_SPACE_BARRIER_WRITE);
3289 
3290 	if (++que->tx.sc_tx_desc_head == sc->sc_tx_slots)
3291 		que->tx.sc_tx_desc_head = 0;
3292 
3293 	EM_WRITE_REG(&sc->hw, E1000_TDT(que->me), que->tx.sc_tx_desc_head);
3294 	bus_space_barrier(sc->osdep.mem_bus_space_tag, sc->osdep.mem_bus_space_handle,
3295 	    0, 0, BUS_SPACE_BARRIER_READ|BUS_SPACE_BARRIER_WRITE);
3296 	usec_delay(250);
3297 }
3298 
3299 /*
3300  * em_flush_rx_ring - remove all descriptors from the rx_ring
3301  *
3302  * Mark all descriptors in the RX ring as consumed and disable the rx ring
3303  */
3304 void
3305 em_flush_rx_ring(struct em_queue *que)
3306 {
3307 	uint32_t	rctl, rxdctl;
3308 	struct em_softc	*sc = que->sc;
3309 
3310 	rctl = EM_READ_REG(&sc->hw, E1000_RCTL);
3311 	EM_WRITE_REG(&sc->hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
3312 	E1000_WRITE_FLUSH(&sc->hw);
3313 	usec_delay(150);
3314 
3315 	rxdctl = EM_READ_REG(&sc->hw, E1000_RXDCTL(que->me));
3316 	/* zero the lower 14 bits (prefetch and host thresholds) */
3317 	rxdctl &= 0xffffc000;
3318 	/*
3319 	 * update thresholds: prefetch threshold to 31, host threshold to 1
3320 	 * and make sure the granularity is "descriptors" and not "cache lines"
3321 	 */
3322 	rxdctl |= (0x1F | (1 << 8) | E1000_RXDCTL_THRESH_UNIT_DESC);
3323 	EM_WRITE_REG(&sc->hw, E1000_RXDCTL(que->me), rxdctl);
3324 
3325 	/* momentarily enable the RX ring for the changes to take effect */
3326 	EM_WRITE_REG(&sc->hw, E1000_RCTL, rctl | E1000_RCTL_EN);
3327 	E1000_WRITE_FLUSH(&sc->hw);
3328 	usec_delay(150);
3329 	EM_WRITE_REG(&sc->hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
3330 }
3331 
3332 /*
3333  * em_flush_desc_rings - remove all descriptors from the descriptor rings
3334  *
3335  * In i219, the descriptor rings must be emptied before resetting the HW
3336  * or before changing the device state to D3 during runtime (runtime PM).
3337  *
3338  * Failure to do this will cause the HW to enter a unit hang state which can
3339  * only be released by PCI reset on the device
3340  *
3341  */
3342 void
3343 em_flush_desc_rings(struct em_softc *sc)
3344 {
3345 	struct em_queue		*que = sc->queues; /* Use only first queue. */
3346 	struct pci_attach_args	*pa = &sc->osdep.em_pa;
3347 	uint32_t		 fextnvm11, tdlen;
3348 	uint16_t		 hang_state;
3349 
3350 	/* First, disable MULR fix in FEXTNVM11 */
3351 	fextnvm11 = EM_READ_REG(&sc->hw, E1000_FEXTNVM11);
3352 	fextnvm11 |= E1000_FEXTNVM11_DISABLE_MULR_FIX;
3353 	EM_WRITE_REG(&sc->hw, E1000_FEXTNVM11, fextnvm11);
3354 
3355 	/* do nothing if we're not in faulty state, or if the queue is empty */
3356 	tdlen = EM_READ_REG(&sc->hw, E1000_TDLEN(que->me));
3357 	hang_state = pci_conf_read(pa->pa_pc, pa->pa_tag, PCICFG_DESC_RING_STATUS);
3358 	if (!(hang_state & FLUSH_DESC_REQUIRED) || !tdlen)
3359 		return;
3360 	em_flush_tx_ring(que);
3361 
3362 	/* recheck, maybe the fault is caused by the rx ring */
3363 	hang_state = pci_conf_read(pa->pa_pc, pa->pa_tag, PCICFG_DESC_RING_STATUS);
3364 	if (hang_state & FLUSH_DESC_REQUIRED)
3365 		em_flush_rx_ring(que);
3366 }
3367 
3368 int
3369 em_allocate_legacy(struct em_softc *sc)
3370 {
3371 	pci_intr_handle_t	 ih;
3372 	const char		*intrstr = NULL;
3373 	struct pci_attach_args	*pa = &sc->osdep.em_pa;
3374 	pci_chipset_tag_t	 pc = pa->pa_pc;
3375 
3376 	if (pci_intr_map_msi(pa, &ih)) {
3377 		if (pci_intr_map(pa, &ih)) {
3378 			printf(": couldn't map interrupt\n");
3379 			return (ENXIO);
3380 		}
3381 		sc->legacy_irq = 1;
3382 	}
3383 
3384 	intrstr = pci_intr_string(pc, ih);
3385 	sc->sc_intrhand = pci_intr_establish(pc, ih, IPL_NET | IPL_MPSAFE,
3386 	    em_intr, sc, DEVNAME(sc));
3387 	if (sc->sc_intrhand == NULL) {
3388 		printf(": couldn't establish interrupt");
3389 		if (intrstr != NULL)
3390 			printf(" at %s", intrstr);
3391 		printf("\n");
3392 		return (ENXIO);
3393 	}
3394 	printf(": %s", intrstr);
3395 
3396 	return (0);
3397 }
3398 
3399 
3400 #ifndef SMALL_KERNEL
3401 /**********************************************************************
3402  *
3403  *  Update the board statistics counters.
3404  *
3405  **********************************************************************/
3406 void
3407 em_update_stats_counters(struct em_softc *sc)
3408 {
3409 	struct em_queue *que = sc->queues; /* Use only first queue. */
3410 	struct ifnet   *ifp = &sc->sc_ac.ac_if;
3411 	uint64_t	colc, rxerrc, crcerrs, algnerrc;
3412 	uint64_t	ruc, roc, mpc, cexterr;
3413 	uint64_t	ecol, latecol;
3414 
3415 	crcerrs = E1000_READ_REG(&sc->hw, CRCERRS);
3416 	sc->stats.crcerrs += crcerrs;
3417 	mpc = E1000_READ_REG(&sc->hw, MPC);
3418 	sc->stats.mpc += mpc;
3419 	ecol = E1000_READ_REG(&sc->hw, ECOL);
3420 	sc->stats.ecol += ecol;
3421 
3422 	latecol = E1000_READ_REG(&sc->hw, LATECOL);
3423 	sc->stats.latecol += latecol;
3424 	colc = E1000_READ_REG(&sc->hw, COLC);
3425 	sc->stats.colc += colc;
3426 
3427 	ruc = E1000_READ_REG(&sc->hw, RUC);
3428 	sc->stats.ruc += ruc;
3429 	roc = E1000_READ_REG(&sc->hw, ROC);
3430 	sc->stats.roc += roc;
3431 
3432 	algnerrc = rxerrc = cexterr = 0;
3433 	if (sc->hw.mac_type >= em_82543) {
3434 		algnerrc = E1000_READ_REG(&sc->hw, ALGNERRC);
3435 		rxerrc = E1000_READ_REG(&sc->hw, RXERRC);
3436 		cexterr = E1000_READ_REG(&sc->hw, CEXTERR);
3437 	}
3438 	sc->stats.algnerrc += algnerrc;
3439 	sc->stats.rxerrc += rxerrc;
3440 	sc->stats.cexterr += cexterr;
3441 
3442 #ifdef EM_DEBUG
3443 	if (sc->hw.media_type == em_media_type_copper ||
3444 	    (E1000_READ_REG(&sc->hw, STATUS) & E1000_STATUS_LU)) {
3445 		sc->stats.symerrs += E1000_READ_REG(&sc->hw, SYMERRS);
3446 		sc->stats.sec += E1000_READ_REG(&sc->hw, SEC);
3447 	}
3448 	sc->stats.scc += E1000_READ_REG(&sc->hw, SCC);
3449 
3450 	sc->stats.mcc += E1000_READ_REG(&sc->hw, MCC);
3451 	sc->stats.dc += E1000_READ_REG(&sc->hw, DC);
3452 	sc->stats.rlec += E1000_READ_REG(&sc->hw, RLEC);
3453 	sc->stats.xonrxc += E1000_READ_REG(&sc->hw, XONRXC);
3454 	sc->stats.xontxc += E1000_READ_REG(&sc->hw, XONTXC);
3455 	sc->stats.xoffrxc += E1000_READ_REG(&sc->hw, XOFFRXC);
3456 	sc->stats.xofftxc += E1000_READ_REG(&sc->hw, XOFFTXC);
3457 	sc->stats.fcruc += E1000_READ_REG(&sc->hw, FCRUC);
3458 	sc->stats.prc64 += E1000_READ_REG(&sc->hw, PRC64);
3459 	sc->stats.prc127 += E1000_READ_REG(&sc->hw, PRC127);
3460 	sc->stats.prc255 += E1000_READ_REG(&sc->hw, PRC255);
3461 	sc->stats.prc511 += E1000_READ_REG(&sc->hw, PRC511);
3462 	sc->stats.prc1023 += E1000_READ_REG(&sc->hw, PRC1023);
3463 	sc->stats.prc1522 += E1000_READ_REG(&sc->hw, PRC1522);
3464 	sc->stats.gprc += E1000_READ_REG(&sc->hw, GPRC);
3465 	sc->stats.bprc += E1000_READ_REG(&sc->hw, BPRC);
3466 	sc->stats.mprc += E1000_READ_REG(&sc->hw, MPRC);
3467 	sc->stats.gptc += E1000_READ_REG(&sc->hw, GPTC);
3468 
3469 	/* For the 64-bit byte counters the low dword must be read first. */
3470 	/* Both registers clear on the read of the high dword */
3471 
3472 	sc->stats.gorcl += E1000_READ_REG(&sc->hw, GORCL);
3473 	sc->stats.gorch += E1000_READ_REG(&sc->hw, GORCH);
3474 	sc->stats.gotcl += E1000_READ_REG(&sc->hw, GOTCL);
3475 	sc->stats.gotch += E1000_READ_REG(&sc->hw, GOTCH);
3476 
3477 	sc->stats.rnbc += E1000_READ_REG(&sc->hw, RNBC);
3478 	sc->stats.rfc += E1000_READ_REG(&sc->hw, RFC);
3479 	sc->stats.rjc += E1000_READ_REG(&sc->hw, RJC);
3480 
3481 	sc->stats.torl += E1000_READ_REG(&sc->hw, TORL);
3482 	sc->stats.torh += E1000_READ_REG(&sc->hw, TORH);
3483 	sc->stats.totl += E1000_READ_REG(&sc->hw, TOTL);
3484 	sc->stats.toth += E1000_READ_REG(&sc->hw, TOTH);
3485 
3486 	sc->stats.tpr += E1000_READ_REG(&sc->hw, TPR);
3487 	sc->stats.tpt += E1000_READ_REG(&sc->hw, TPT);
3488 	sc->stats.ptc64 += E1000_READ_REG(&sc->hw, PTC64);
3489 	sc->stats.ptc127 += E1000_READ_REG(&sc->hw, PTC127);
3490 	sc->stats.ptc255 += E1000_READ_REG(&sc->hw, PTC255);
3491 	sc->stats.ptc511 += E1000_READ_REG(&sc->hw, PTC511);
3492 	sc->stats.ptc1023 += E1000_READ_REG(&sc->hw, PTC1023);
3493 	sc->stats.ptc1522 += E1000_READ_REG(&sc->hw, PTC1522);
3494 	sc->stats.mptc += E1000_READ_REG(&sc->hw, MPTC);
3495 	sc->stats.bptc += E1000_READ_REG(&sc->hw, BPTC);
3496 	sc->stats.sdpc += E1000_READ_REG(&sc->hw, SDPC);
3497 	sc->stats.mngpdc += E1000_READ_REG(&sc->hw, MGTPDC);
3498 	sc->stats.mngprc += E1000_READ_REG(&sc->hw, MGTPRC);
3499 	sc->stats.mngptc += E1000_READ_REG(&sc->hw, MGTPTC);
3500 	sc->stats.b2ospc += E1000_READ_REG(&sc->hw, B2OSPC);
3501 	sc->stats.o2bgptc += E1000_READ_REG(&sc->hw, O2BGPTC);
3502 	sc->stats.b2ogprc += E1000_READ_REG(&sc->hw, B2OGPRC);
3503 	sc->stats.o2bspc += E1000_READ_REG(&sc->hw, O2BSPC);
3504 	sc->stats.rpthc += E1000_READ_REG(&sc->hw, RPTHC);
3505 
3506 	if (sc->hw.mac_type >= em_82543) {
3507 		sc->stats.tncrs +=
3508 		E1000_READ_REG(&sc->hw, TNCRS);
3509 		sc->stats.tsctc +=
3510 		E1000_READ_REG(&sc->hw, TSCTC);
3511 		sc->stats.tsctfc +=
3512 		E1000_READ_REG(&sc->hw, TSCTFC);
3513 	}
3514 #endif
3515 
3516 	/* Fill out the OS statistics structure */
3517 	ifp->if_collisions = colc;
3518 
3519 	/* Rx Errors */
3520 	ifp->if_ierrors =
3521 	    que->rx.dropped_pkts +
3522 	    rxerrc +
3523 	    crcerrs +
3524 	    algnerrc +
3525 	    ruc +
3526 	    roc +
3527 	    mpc +
3528 	    cexterr +
3529 	    sc->rx_overruns;
3530 
3531 	/* Tx Errors */
3532 	ifp->if_oerrors = ecol + latecol +
3533 	    sc->watchdog_events;
3534 }
3535 
3536 #ifdef EM_DEBUG
3537 /**********************************************************************
3538  *
3539  *  This routine is called only when IFF_DEBUG is enabled.
3540  *  This routine provides a way to take a look at important statistics
3541  *  maintained by the driver and hardware.
3542  *
3543  **********************************************************************/
3544 void
3545 em_print_hw_stats(struct em_softc *sc)
3546 {
3547 	const char * const unit = DEVNAME(sc);
3548 	struct em_queue *que;
3549 
3550 	printf("%s: Excessive collisions = %lld\n", unit,
3551 		(long long)sc->stats.ecol);
3552 	printf("%s: Symbol errors = %lld\n", unit,
3553 		(long long)sc->stats.symerrs);
3554 	printf("%s: Sequence errors = %lld\n", unit,
3555 		(long long)sc->stats.sec);
3556 	printf("%s: Defer count = %lld\n", unit,
3557 		(long long)sc->stats.dc);
3558 
3559 	printf("%s: Missed Packets = %lld\n", unit,
3560 		(long long)sc->stats.mpc);
3561 	printf("%s: Receive No Buffers = %lld\n", unit,
3562 		(long long)sc->stats.rnbc);
3563 	/* RLEC is inaccurate on some hardware, calculate our own */
3564 	printf("%s: Receive Length Errors = %lld\n", unit,
3565 		((long long)sc->stats.roc +
3566 		(long long)sc->stats.ruc));
3567 	printf("%s: Receive errors = %lld\n", unit,
3568 		(long long)sc->stats.rxerrc);
3569 	printf("%s: Crc errors = %lld\n", unit,
3570 		(long long)sc->stats.crcerrs);
3571 	printf("%s: Alignment errors = %lld\n", unit,
3572 		(long long)sc->stats.algnerrc);
3573 	printf("%s: Carrier extension errors = %lld\n", unit,
3574 		(long long)sc->stats.cexterr);
3575 
3576 	printf("%s: RX overruns = %ld\n", unit,
3577 		sc->rx_overruns);
3578 	printf("%s: watchdog timeouts = %ld\n", unit,
3579 		sc->watchdog_events);
3580 
3581 	printf("%s: XON Rcvd = %lld\n", unit,
3582 		(long long)sc->stats.xonrxc);
3583 	printf("%s: XON Xmtd = %lld\n", unit,
3584 		(long long)sc->stats.xontxc);
3585 	printf("%s: XOFF Rcvd = %lld\n", unit,
3586 		(long long)sc->stats.xoffrxc);
3587 	printf("%s: XOFF Xmtd = %lld\n", unit,
3588 		(long long)sc->stats.xofftxc);
3589 
3590 	printf("%s: Good Packets Rcvd = %lld\n", unit,
3591 		(long long)sc->stats.gprc);
3592 	printf("%s: Good Packets Xmtd = %lld\n", unit,
3593 		(long long)sc->stats.gptc);
3594 	printf("%s: Switch Drop Packet Count = %lld\n", unit,
3595 	    (long long)sc->stats.sdpc);
3596 	printf("%s: Management Packets Dropped Count  = %lld\n", unit,
3597 	    (long long)sc->stats.mngptc);
3598 	printf("%s: Management Packets Received Count  = %lld\n", unit,
3599 	    (long long)sc->stats.mngprc);
3600 	printf("%s: Management Packets Transmitted Count  = %lld\n", unit,
3601 	    (long long)sc->stats.mngptc);
3602 	printf("%s: OS2BMC Packets Sent by MC Count  = %lld\n", unit,
3603 	    (long long)sc->stats.b2ospc);
3604 	printf("%s: OS2BMC Packets Received by MC Count  = %lld\n", unit,
3605 	    (long long)sc->stats.o2bgptc);
3606 	printf("%s: OS2BMC Packets Received by Host Count  = %lld\n", unit,
3607 	    (long long)sc->stats.b2ogprc);
3608 	printf("%s: OS2BMC Packets Transmitted by Host Count  = %lld\n", unit,
3609 	    (long long)sc->stats.o2bspc);
3610 	printf("%s: Multicast Packets Received Count  = %lld\n", unit,
3611 	    (long long)sc->stats.mprc);
3612 	printf("%s: Rx Packets to Host Count = %lld\n", unit,
3613 	    (long long)sc->stats.rpthc);
3614 	FOREACH_QUEUE(sc, que) {
3615 		printf("%s: Queue %d Good Packets Received = %d\n", unit,
3616 		    que->me, E1000_READ_REG(&sc->hw, PQGPRC(que->me)));
3617 	}
3618 }
3619 #endif
3620 
3621 int
3622 em_allocate_msix(struct em_softc *sc)
3623 {
3624 	pci_intr_handle_t	 ih;
3625 	const char		*intrstr = NULL;
3626 	struct pci_attach_args	*pa = &sc->osdep.em_pa;
3627 	pci_chipset_tag_t	 pc = pa->pa_pc;
3628 	struct em_queue		*que = sc->queues; /* Use only first queue. */
3629 	int			 vec;
3630 
3631 	if (!em_enable_msix)
3632 		return (ENODEV);
3633 
3634 	switch (sc->hw.mac_type) {
3635 	case em_82576:
3636 	case em_82580:
3637 	case em_i350:
3638 	case em_i210:
3639 		break;
3640 	default:
3641 		return (ENODEV);
3642 	}
3643 
3644 	vec = 0;
3645 	if (pci_intr_map_msix(pa, vec, &ih))
3646 		return (ENODEV);
3647 	sc->msix = 1;
3648 
3649 	que->me = vec;
3650 	que->eims = 1 << vec;
3651 	snprintf(que->name, sizeof(que->name), "%s:%d", DEVNAME(sc), vec);
3652 
3653 	intrstr = pci_intr_string(pc, ih);
3654 	que->tag = pci_intr_establish(pc, ih, IPL_NET | IPL_MPSAFE,
3655 	    em_queue_intr_msix, que, que->name);
3656 	if (que->tag == NULL) {
3657 		printf(": couldn't establish interrupt");
3658 		if (intrstr != NULL)
3659 			printf(" at %s", intrstr);
3660 		printf("\n");
3661 		return (ENXIO);
3662 	}
3663 
3664 	/* Setup linkvector, use last queue vector + 1 */
3665 	vec++;
3666 	sc->msix_linkvec = vec;
3667 	if (pci_intr_map_msix(pa, sc->msix_linkvec, &ih)) {
3668 		printf(": couldn't map link vector\n");
3669 		return (ENXIO);
3670 	}
3671 
3672 	intrstr = pci_intr_string(pc, ih);
3673 	sc->sc_intrhand = pci_intr_establish(pc, ih, IPL_NET | IPL_MPSAFE,
3674 	    em_link_intr_msix, sc, DEVNAME(sc));
3675 	if (sc->sc_intrhand == NULL) {
3676 		printf(": couldn't establish interrupt");
3677 		if (intrstr != NULL)
3678 			printf(" at %s", intrstr);
3679 		printf("\n");
3680 		return (ENXIO);
3681 	}
3682 	printf(", %s, %d queue%s", intrstr, vec, (vec > 1) ? "s" : "");
3683 
3684 	return (0);
3685 }
3686 
3687 /*
3688  * Interrupt for a specific queue, (not link interrupts). The EICR bit which
3689  * maps to the EIMS bit expresses both RX and TX, therefore we can't
3690  * distringuish if this is a RX completion of TX completion and must do both.
3691  * The bits in EICR are autocleared and we _cannot_ read EICR.
3692  */
3693 int
3694 em_queue_intr_msix(void *vque)
3695 {
3696 	struct em_queue *que = vque;
3697 	struct em_softc *sc = que->sc;
3698 	struct ifnet   *ifp = &sc->sc_ac.ac_if;
3699 
3700 	if (ifp->if_flags & IFF_RUNNING) {
3701 		em_txeof(que);
3702 		if (em_rxeof(que))
3703 			em_rxrefill(que);
3704 	}
3705 
3706 	em_enable_queue_intr_msix(que);
3707 
3708 	return (1);
3709 }
3710 
3711 int
3712 em_link_intr_msix(void *arg)
3713 {
3714 	struct em_softc *sc = arg;
3715 	uint32_t icr;
3716 
3717 	icr = E1000_READ_REG(&sc->hw, ICR);
3718 
3719 	/* Link status change */
3720 	if (icr & E1000_ICR_LSC) {
3721 		KERNEL_LOCK();
3722 		sc->hw.get_link_status = 1;
3723 		em_check_for_link(&sc->hw);
3724 		em_update_link_status(sc);
3725 		KERNEL_UNLOCK();
3726 	}
3727 
3728 	/* Re-arm unconditionally */
3729 	E1000_WRITE_REG(&sc->hw, IMS, E1000_ICR_LSC);
3730 	E1000_WRITE_REG(&sc->hw, EIMS, sc->msix_linkmask);
3731 
3732 	return (1);
3733 }
3734 
3735 /*
3736  * Maps queues into msix interrupt vectors.
3737  */
3738 int
3739 em_setup_queues_msix(struct em_softc *sc)
3740 {
3741 	uint32_t ivar, newitr, index;
3742 	struct em_queue *que;
3743 
3744 	KASSERT(sc->msix);
3745 
3746 	/* First turn on RSS capability */
3747 	if (sc->hw.mac_type != em_82575)
3748 		E1000_WRITE_REG(&sc->hw, GPIE,
3749 		    E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
3750 		    E1000_GPIE_PBA | E1000_GPIE_NSICR);
3751 
3752 	/* Turn on MSIX */
3753 	switch (sc->hw.mac_type) {
3754 	case em_82580:
3755 	case em_i350:
3756 	case em_i210:
3757 		/* RX entries */
3758 		/*
3759 		 * Note, this maps Queues into MSIX vectors, it works fine.
3760 		 * The funky calculation of offsets and checking if que->me is
3761 		 * odd is due to the weird register distribution, the datasheet
3762 		 * explains it well.
3763 		 */
3764 		FOREACH_QUEUE(sc, que) {
3765 			index = que->me >> 1;
3766 			ivar = E1000_READ_REG_ARRAY(&sc->hw, IVAR0, index);
3767 			if (que->me & 1) {
3768 				ivar &= 0xFF00FFFF;
3769 				ivar |= (que->me | E1000_IVAR_VALID) << 16;
3770 			} else {
3771 				ivar &= 0xFFFFFF00;
3772 				ivar |= que->me | E1000_IVAR_VALID;
3773 			}
3774 			E1000_WRITE_REG_ARRAY(&sc->hw, IVAR0, index, ivar);
3775 		}
3776 
3777 		/* TX entries */
3778 		FOREACH_QUEUE(sc, que) {
3779 			index = que->me >> 1;
3780 			ivar = E1000_READ_REG_ARRAY(&sc->hw, IVAR0, index);
3781 			if (que->me & 1) {
3782 				ivar &= 0x00FFFFFF;
3783 				ivar |= (que->me | E1000_IVAR_VALID) << 24;
3784 			} else {
3785 				ivar &= 0xFFFF00FF;
3786 				ivar |= (que->me | E1000_IVAR_VALID) << 8;
3787 			}
3788 			E1000_WRITE_REG_ARRAY(&sc->hw, IVAR0, index, ivar);
3789 			sc->msix_queuesmask |= que->eims;
3790 		}
3791 
3792 		/* And for the link interrupt */
3793 		ivar = (sc->msix_linkvec | E1000_IVAR_VALID) << 8;
3794 		sc->msix_linkmask = 1 << sc->msix_linkvec;
3795 		E1000_WRITE_REG(&sc->hw, IVAR_MISC, ivar);
3796 		break;
3797 	case em_82576:
3798 		/* RX entries */
3799 		FOREACH_QUEUE(sc, que) {
3800 			index = que->me & 0x7; /* Each IVAR has two entries */
3801 			ivar = E1000_READ_REG_ARRAY(&sc->hw, IVAR0, index);
3802 			if (que->me < 8) {
3803 				ivar &= 0xFFFFFF00;
3804 				ivar |= que->me | E1000_IVAR_VALID;
3805 			} else {
3806 				ivar &= 0xFF00FFFF;
3807 				ivar |= (que->me | E1000_IVAR_VALID) << 16;
3808 			}
3809 			E1000_WRITE_REG_ARRAY(&sc->hw, IVAR0, index, ivar);
3810 			sc->msix_queuesmask |= que->eims;
3811 		}
3812 		/* TX entries */
3813 		FOREACH_QUEUE(sc, que) {
3814 			index = que->me & 0x7; /* Each IVAR has two entries */
3815 			ivar = E1000_READ_REG_ARRAY(&sc->hw, IVAR0, index);
3816 			if (que->me < 8) {
3817 				ivar &= 0xFFFF00FF;
3818 				ivar |= (que->me | E1000_IVAR_VALID) << 8;
3819 			} else {
3820 				ivar &= 0x00FFFFFF;
3821 				ivar |= (que->me | E1000_IVAR_VALID) << 24;
3822 			}
3823 			E1000_WRITE_REG_ARRAY(&sc->hw, IVAR0, index, ivar);
3824 			sc->msix_queuesmask |= que->eims;
3825 		}
3826 
3827 		/* And for the link interrupt */
3828 		ivar = (sc->msix_linkvec | E1000_IVAR_VALID) << 8;
3829 		sc->msix_linkmask = 1 << sc->msix_linkvec;
3830 		E1000_WRITE_REG(&sc->hw, IVAR_MISC, ivar);
3831 		break;
3832 	default:
3833 		panic("unsupported mac");
3834 		break;
3835 	}
3836 
3837 	/* Set the starting interrupt rate */
3838 	newitr = (4000000 / MAX_INTS_PER_SEC) & 0x7FFC;
3839 
3840 	if (sc->hw.mac_type == em_82575)
3841 		newitr |= newitr << 16;
3842 	else
3843 		newitr |= E1000_EITR_CNT_IGNR;
3844 
3845 	FOREACH_QUEUE(sc, que)
3846 		E1000_WRITE_REG(&sc->hw, EITR(que->me), newitr);
3847 
3848 	return (0);
3849 }
3850 
3851 void
3852 em_enable_queue_intr_msix(struct em_queue *que)
3853 {
3854 	E1000_WRITE_REG(&que->sc->hw, EIMS, que->eims);
3855 }
3856 #endif /* !SMALL_KERNEL */
3857 
3858 int
3859 em_allocate_desc_rings(struct em_softc *sc)
3860 {
3861 	struct em_queue *que;
3862 
3863 	FOREACH_QUEUE(sc, que) {
3864 		/* Allocate Transmit Descriptor ring */
3865 		if (em_dma_malloc(sc, sc->sc_tx_slots * sizeof(struct em_tx_desc),
3866 		    &que->tx.sc_tx_dma) != 0) {
3867 			printf("%s: Unable to allocate tx_desc memory\n",
3868 			    DEVNAME(sc));
3869 			return (ENOMEM);
3870 		}
3871 		que->tx.sc_tx_desc_ring =
3872 		    (struct em_tx_desc *)que->tx.sc_tx_dma.dma_vaddr;
3873 
3874 		/* Allocate Receive Descriptor ring */
3875 		if (em_dma_malloc(sc, sc->sc_rx_slots * sizeof(struct em_rx_desc),
3876 		    &que->rx.sc_rx_dma) != 0) {
3877 			printf("%s: Unable to allocate rx_desc memory\n",
3878 			    DEVNAME(sc));
3879 			return (ENOMEM);
3880 		}
3881 		que->rx.sc_rx_desc_ring =
3882 		    (struct em_rx_desc *)que->rx.sc_rx_dma.dma_vaddr;
3883 	}
3884 
3885 	return (0);
3886 }
3887