xref: /openbsd-src/sys/dev/pci/if_em.c (revision 4e1ee0786f11cc571bd0be17d38e46f635c719fc)
1 /**************************************************************************
2 
3 Copyright (c) 2001-2003, Intel Corporation
4 All rights reserved.
5 
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8 
9  1. Redistributions of source code must retain the above copyright notice,
10     this list of conditions and the following disclaimer.
11 
12  2. Redistributions in binary form must reproduce the above copyright
13     notice, this list of conditions and the following disclaimer in the
14     documentation and/or other materials provided with the distribution.
15 
16  3. Neither the name of the Intel Corporation nor the names of its
17     contributors may be used to endorse or promote products derived from
18     this software without specific prior written permission.
19 
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 POSSIBILITY OF SUCH DAMAGE.
31 
32 ***************************************************************************/
33 
34 /* $OpenBSD: if_em.c,v 1.358 2021/01/24 10:21:43 jsg Exp $ */
35 /* $FreeBSD: if_em.c,v 1.46 2004/09/29 18:28:28 mlaier Exp $ */
36 
37 #include <dev/pci/if_em.h>
38 #include <dev/pci/if_em_soc.h>
39 
40 /*********************************************************************
41  *  Driver version
42  *********************************************************************/
43 
44 #define EM_DRIVER_VERSION	"6.2.9"
45 
46 /*********************************************************************
47  *  PCI Device ID Table
48  *********************************************************************/
49 const struct pci_matchid em_devices[] = {
50 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_80003ES2LAN_CPR_DPT },
51 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_80003ES2LAN_SDS_DPT },
52 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_80003ES2LAN_CPR_SPT },
53 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_80003ES2LAN_SDS_SPT },
54 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82540EM },
55 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82540EM_LOM },
56 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82540EP },
57 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82540EP_LOM },
58 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82540EP_LP },
59 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82541EI },
60 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82541EI_MOBILE },
61 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82541ER },
62 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82541ER_LOM },
63 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82541GI },
64 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82541GI_LF },
65 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82541GI_MOBILE },
66 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82542 },
67 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82543GC_COPPER },
68 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82543GC_FIBER },
69 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82544EI_COPPER },
70 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82544EI_FIBER },
71 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82544GC_COPPER },
72 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82544GC_LOM },
73 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82545EM_COPPER },
74 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82545EM_FIBER },
75 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82545GM_COPPER },
76 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82545GM_FIBER },
77 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82545GM_SERDES },
78 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82546EB_COPPER },
79 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82546EB_FIBER },
80 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82546EB_QUAD_CPR },
81 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82546GB_COPPER },
82 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82546GB_FIBER },
83 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82546GB_PCIE },
84 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82546GB_QUAD_CPR },
85 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82546GB_QUAD_CPR_K },
86 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82546GB_SERDES },
87 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82546GB_2 },
88 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82547EI },
89 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82547EI_MOBILE },
90 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82547GI },
91 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82571EB_AF },
92 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82571EB_AT },
93 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82571EB_COPPER },
94 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82571EB_FIBER },
95 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82571EB_QUAD_CPR },
96 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82571EB_QUAD_CPR_LP },
97 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82571EB_QUAD_FBR },
98 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82571EB_SERDES },
99 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82571EB_SDS_DUAL },
100 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82571EB_SDS_QUAD },
101 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82571PT_QUAD_CPR },
102 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82572EI_COPPER },
103 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82572EI_FIBER },
104 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82572EI_SERDES },
105 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82572EI },
106 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82573E },
107 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82573E_IAMT },
108 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82573E_PM },
109 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82573L },
110 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82573L_PL_1 },
111 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82573L_PL_2 },
112 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82573V_PM },
113 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82574L },
114 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82574LA },
115 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82575EB_COPPER },
116 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82575EB_SERDES },
117 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82575GB_QUAD_CPR },
118 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82575GB_QP_PM },
119 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82576 },
120 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82576_FIBER },
121 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82576_SERDES },
122 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82576_QUAD_COPPER },
123 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82576_QUAD_CU_ET2 },
124 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82576_NS },
125 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82576_NS_SERDES },
126 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82576_SERDES_QUAD },
127 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82577LC },
128 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82577LM },
129 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82578DC },
130 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82578DM },
131 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82579LM },
132 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82579V },
133 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I210_COPPER },
134 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I210_COPPER_OEM1 },
135 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I210_COPPER_IT },
136 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I210_FIBER },
137 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I210_SERDES },
138 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I210_SGMII },
139 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I210_COPPER_NF },
140 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I210_SERDES_NF },
141 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I211_COPPER },
142 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I217_LM },
143 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I217_V },
144 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I218_LM },
145 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I218_LM_2 },
146 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I218_LM_3 },
147 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I218_V },
148 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I218_V_2 },
149 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I218_V_3 },
150 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM },
151 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM2 },
152 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM3 },
153 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM4 },
154 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM5 },
155 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM6 },
156 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM7 },
157 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM8 },
158 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM9 },
159 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM10 },
160 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM11 },
161 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM12 },
162 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM13 },
163 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM14 },
164 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM15 },
165 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM16 },
166 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM17 },
167 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM18 },
168 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM19 },
169 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V },
170 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V2 },
171 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V4 },
172 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V5 },
173 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V6 },
174 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V7 },
175 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V8 },
176 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V9 },
177 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V10 },
178 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V11 },
179 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V12 },
180 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V13 },
181 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V14 },
182 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V15 },
183 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V16 },
184 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V17 },
185 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V18 },
186 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V19 },
187 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82580_COPPER },
188 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82580_FIBER },
189 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82580_SERDES },
190 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82580_SGMII },
191 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82580_COPPER_DUAL },
192 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82580_QUAD_FIBER },
193 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_DH89XXCC_SGMII },
194 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_DH89XXCC_SERDES },
195 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_DH89XXCC_BPLANE },
196 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_DH89XXCC_SFP },
197 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82583V },
198 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I350_COPPER },
199 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I350_FIBER },
200 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I350_SERDES },
201 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I350_SGMII },
202 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I354_BP_1GBPS },
203 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I354_BP_2_5GBPS },
204 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I354_SGMII },
205 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH8_82567V_3 },
206 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH8_IFE },
207 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH8_IFE_G },
208 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH8_IFE_GT },
209 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH8_IGP_AMT },
210 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH8_IGP_C },
211 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH8_IGP_M },
212 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH8_IGP_M_AMT },
213 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH9_BM },
214 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH9_IFE },
215 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH9_IFE_G },
216 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH9_IFE_GT },
217 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH9_IGP_AMT },
218 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH9_IGP_C },
219 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH9_IGP_M },
220 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH9_IGP_M_AMT },
221 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH9_IGP_M_V },
222 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH10_D_BM_LF },
223 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH10_D_BM_LM },
224 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH10_D_BM_V },
225 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH10_R_BM_LF },
226 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH10_R_BM_LM },
227 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH10_R_BM_V },
228 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_EP80579_LAN_1 },
229 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_EP80579_LAN_2 },
230 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_EP80579_LAN_3 },
231 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_EP80579_LAN_4 },
232 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_EP80579_LAN_5 },
233 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_EP80579_LAN_6 }
234 };
235 
236 /*********************************************************************
237  *  Function prototypes
238  *********************************************************************/
239 int  em_probe(struct device *, void *, void *);
240 void em_attach(struct device *, struct device *, void *);
241 void em_defer_attach(struct device*);
242 int  em_detach(struct device *, int);
243 int  em_activate(struct device *, int);
244 int  em_intr(void *);
245 int  em_allocate_legacy(struct em_softc *);
246 void em_start(struct ifqueue *);
247 int  em_ioctl(struct ifnet *, u_long, caddr_t);
248 void em_watchdog(struct ifnet *);
249 void em_init(void *);
250 void em_stop(void *, int);
251 void em_media_status(struct ifnet *, struct ifmediareq *);
252 int  em_media_change(struct ifnet *);
253 uint64_t  em_flowstatus(struct em_softc *);
254 void em_identify_hardware(struct em_softc *);
255 int  em_allocate_pci_resources(struct em_softc *);
256 void em_free_pci_resources(struct em_softc *);
257 void em_local_timer(void *);
258 int  em_hardware_init(struct em_softc *);
259 void em_setup_interface(struct em_softc *);
260 int  em_setup_transmit_structures(struct em_softc *);
261 void em_initialize_transmit_unit(struct em_softc *);
262 int  em_setup_receive_structures(struct em_softc *);
263 void em_initialize_receive_unit(struct em_softc *);
264 void em_enable_intr(struct em_softc *);
265 void em_disable_intr(struct em_softc *);
266 void em_free_transmit_structures(struct em_softc *);
267 void em_free_receive_structures(struct em_softc *);
268 void em_update_stats_counters(struct em_softc *);
269 void em_disable_aspm(struct em_softc *);
270 void em_txeof(struct em_queue *);
271 int  em_allocate_receive_structures(struct em_softc *);
272 int  em_allocate_transmit_structures(struct em_softc *);
273 int  em_allocate_desc_rings(struct em_softc *);
274 int  em_rxfill(struct em_queue *);
275 void em_rxrefill(void *);
276 int  em_rxeof(struct em_queue *);
277 void em_receive_checksum(struct em_softc *, struct em_rx_desc *,
278 			 struct mbuf *);
279 u_int	em_transmit_checksum_setup(struct em_queue *, struct mbuf *, u_int,
280 	    u_int32_t *, u_int32_t *);
281 void em_iff(struct em_softc *);
282 void em_update_link_status(struct em_softc *);
283 int  em_get_buf(struct em_queue *, int);
284 void em_enable_hw_vlans(struct em_softc *);
285 u_int em_encap(struct em_queue *, struct mbuf *);
286 void em_smartspeed(struct em_softc *);
287 int  em_82547_fifo_workaround(struct em_softc *, int);
288 void em_82547_update_fifo_head(struct em_softc *, int);
289 int  em_82547_tx_fifo_reset(struct em_softc *);
290 void em_82547_move_tail(void *arg);
291 void em_82547_move_tail_locked(struct em_softc *);
292 int  em_dma_malloc(struct em_softc *, bus_size_t, struct em_dma_alloc *);
293 void em_dma_free(struct em_softc *, struct em_dma_alloc *);
294 u_int32_t em_fill_descriptors(u_int64_t address, u_int32_t length,
295 			      PDESC_ARRAY desc_array);
296 void em_flush_tx_ring(struct em_queue *);
297 void em_flush_rx_ring(struct em_queue *);
298 void em_flush_desc_rings(struct em_softc *);
299 
300 #ifndef SMALL_KERNEL
301 /* MSIX/Multiqueue functions */
302 int  em_allocate_msix(struct em_softc *);
303 int  em_setup_queues_msix(struct em_softc *);
304 int  em_queue_intr_msix(void *);
305 int  em_link_intr_msix(void *);
306 void em_enable_queue_intr_msix(struct em_queue *);
307 #else
308 #define em_allocate_msix(_sc) 	(-1)
309 #endif
310 
311 #if NKSTAT > 0
312 void	em_kstat_attach(struct em_softc *);
313 int	em_kstat_read(struct kstat *);
314 void	em_tbi_adjust_stats(struct em_softc *, uint32_t, uint8_t *);
315 #endif
316 
317 /*********************************************************************
318  *  OpenBSD Device Interface Entry Points
319  *********************************************************************/
320 
321 struct cfattach em_ca = {
322 	sizeof(struct em_softc), em_probe, em_attach, em_detach,
323 	em_activate
324 };
325 
326 struct cfdriver em_cd = {
327 	NULL, "em", DV_IFNET
328 };
329 
330 static int em_smart_pwr_down = FALSE;
331 int em_enable_msix = 0;
332 
333 /*********************************************************************
334  *  Device identification routine
335  *
336  *  em_probe determines if the driver should be loaded on
337  *  adapter based on PCI vendor/device id of the adapter.
338  *
339  *  return 0 on no match, positive on match
340  *********************************************************************/
341 
342 int
343 em_probe(struct device *parent, void *match, void *aux)
344 {
345 	INIT_DEBUGOUT("em_probe: begin");
346 
347 	return (pci_matchbyid((struct pci_attach_args *)aux, em_devices,
348 	    nitems(em_devices)));
349 }
350 
351 void
352 em_defer_attach(struct device *self)
353 {
354 	struct em_softc *sc = (struct em_softc *)self;
355 	struct pci_attach_args *pa = &sc->osdep.em_pa;
356 	pci_chipset_tag_t	pc = pa->pa_pc;
357 	void *gcu;
358 
359 	INIT_DEBUGOUT("em_defer_attach: begin");
360 
361 	if ((gcu = em_lookup_gcu(self)) == 0) {
362 		printf("%s: No GCU found, defered attachment failed\n",
363 		    DEVNAME(sc));
364 
365 		if (sc->sc_intrhand)
366 			pci_intr_disestablish(pc, sc->sc_intrhand);
367 		sc->sc_intrhand = 0;
368 
369 		em_stop(sc, 1);
370 
371 		em_free_pci_resources(sc);
372 
373 		return;
374 	}
375 
376 	sc->hw.gcu = gcu;
377 
378 	em_attach_miibus(self);
379 
380 	em_setup_interface(sc);
381 
382 	em_setup_link(&sc->hw);
383 
384 	em_update_link_status(sc);
385 }
386 
387 /*********************************************************************
388  *  Device initialization routine
389  *
390  *  The attach entry point is called when the driver is being loaded.
391  *  This routine identifies the type of hardware, allocates all resources
392  *  and initializes the hardware.
393  *
394  *********************************************************************/
395 
396 void
397 em_attach(struct device *parent, struct device *self, void *aux)
398 {
399 	struct pci_attach_args *pa = aux;
400 	struct em_softc *sc;
401 	int defer = 0;
402 
403 	INIT_DEBUGOUT("em_attach: begin");
404 
405 	sc = (struct em_softc *)self;
406 	sc->sc_dmat = pa->pa_dmat;
407 	sc->osdep.em_pa = *pa;
408 
409 	timeout_set(&sc->timer_handle, em_local_timer, sc);
410 	timeout_set(&sc->tx_fifo_timer_handle, em_82547_move_tail, sc);
411 
412 	/* Determine hardware revision */
413 	em_identify_hardware(sc);
414 
415 	/*
416 	 * Only use MSI on the newer PCIe parts, with the exception
417 	 * of 82571/82572 due to "Byte Enables 2 and 3 Are Not Set" errata
418 	 */
419 	if (sc->hw.mac_type <= em_82572)
420 		sc->osdep.em_pa.pa_flags &= ~PCI_FLAGS_MSI_ENABLED;
421 
422 	/* Parameters (to be read from user) */
423 	if (sc->hw.mac_type >= em_82544) {
424 		sc->sc_tx_slots = EM_MAX_TXD;
425 		sc->sc_rx_slots = EM_MAX_RXD;
426 	} else {
427 		sc->sc_tx_slots = EM_MAX_TXD_82543;
428 		sc->sc_rx_slots = EM_MAX_RXD_82543;
429 	}
430 	sc->tx_int_delay = EM_TIDV;
431 	sc->tx_abs_int_delay = EM_TADV;
432 	sc->rx_int_delay = EM_RDTR;
433 	sc->rx_abs_int_delay = EM_RADV;
434 	sc->hw.autoneg = DO_AUTO_NEG;
435 	sc->hw.wait_autoneg_complete = WAIT_FOR_AUTO_NEG_DEFAULT;
436 	sc->hw.autoneg_advertised = AUTONEG_ADV_DEFAULT;
437 	sc->hw.tbi_compatibility_en = TRUE;
438 	sc->sc_rx_buffer_len = EM_RXBUFFER_2048;
439 
440 	sc->hw.phy_init_script = 1;
441 	sc->hw.phy_reset_disable = FALSE;
442 
443 #ifndef EM_MASTER_SLAVE
444 	sc->hw.master_slave = em_ms_hw_default;
445 #else
446 	sc->hw.master_slave = EM_MASTER_SLAVE;
447 #endif
448 
449 	/*
450 	 * This controls when hardware reports transmit completion
451 	 * status.
452 	 */
453 	sc->hw.report_tx_early = 1;
454 
455 	if (em_allocate_pci_resources(sc))
456 		goto err_pci;
457 
458 	/* Initialize eeprom parameters */
459 	em_init_eeprom_params(&sc->hw);
460 
461 	/*
462 	 * Set the max frame size assuming standard Ethernet
463 	 * sized frames.
464 	 */
465 	switch (sc->hw.mac_type) {
466 		case em_82573:
467 		{
468 			uint16_t	eeprom_data = 0;
469 
470 			/*
471 			 * 82573 only supports Jumbo frames
472 			 * if ASPM is disabled.
473 			 */
474 			em_read_eeprom(&sc->hw, EEPROM_INIT_3GIO_3,
475 			    1, &eeprom_data);
476 			if (eeprom_data & EEPROM_WORD1A_ASPM_MASK) {
477 				sc->hw.max_frame_size = ETHER_MAX_LEN;
478 				break;
479 			}
480 			/* Allow Jumbo frames */
481 			/* FALLTHROUGH */
482 		}
483 		case em_82571:
484 		case em_82572:
485 		case em_82574:
486 		case em_82575:
487 		case em_82576:
488 		case em_82580:
489 		case em_i210:
490 		case em_i350:
491 		case em_ich9lan:
492 		case em_ich10lan:
493 		case em_pch2lan:
494 		case em_pch_lpt:
495 		case em_pch_spt:
496 		case em_pch_cnp:
497 		case em_80003es2lan:
498 			/* 9K Jumbo Frame size */
499 			sc->hw.max_frame_size = 9234;
500 			break;
501 		case em_pchlan:
502 			sc->hw.max_frame_size = 4096;
503 			break;
504 		case em_82542_rev2_0:
505 		case em_82542_rev2_1:
506 		case em_ich8lan:
507 			/* Adapters that do not support Jumbo frames */
508 			sc->hw.max_frame_size = ETHER_MAX_LEN;
509 			break;
510 		default:
511 			sc->hw.max_frame_size =
512 			    MAX_JUMBO_FRAME_SIZE;
513 	}
514 
515 	sc->hw.min_frame_size =
516 	    ETHER_MIN_LEN + ETHER_CRC_LEN;
517 
518 	if (em_allocate_desc_rings(sc) != 0) {
519 		printf("%s: Unable to allocate descriptor ring memory\n",
520 		    DEVNAME(sc));
521 		goto err_pci;
522 	}
523 
524 	/* Initialize the hardware */
525 	if ((defer = em_hardware_init(sc))) {
526 		if (defer == EAGAIN)
527 			config_defer(self, em_defer_attach);
528 		else {
529 			printf("%s: Unable to initialize the hardware\n",
530 			    DEVNAME(sc));
531 			goto err_pci;
532 		}
533 	}
534 
535 	if (sc->hw.mac_type == em_80003es2lan || sc->hw.mac_type == em_82575 ||
536 	    sc->hw.mac_type == em_82576 ||
537 	    sc->hw.mac_type == em_82580 || sc->hw.mac_type == em_i210 ||
538 	    sc->hw.mac_type == em_i350) {
539 		uint32_t reg = EM_READ_REG(&sc->hw, E1000_STATUS);
540 		sc->hw.bus_func = (reg & E1000_STATUS_FUNC_MASK) >>
541 		    E1000_STATUS_FUNC_SHIFT;
542 
543 		switch (sc->hw.bus_func) {
544 		case 0:
545 			sc->hw.swfw = E1000_SWFW_PHY0_SM;
546 			break;
547 		case 1:
548 			sc->hw.swfw = E1000_SWFW_PHY1_SM;
549 			break;
550 		case 2:
551 			sc->hw.swfw = E1000_SWFW_PHY2_SM;
552 			break;
553 		case 3:
554 			sc->hw.swfw = E1000_SWFW_PHY3_SM;
555 			break;
556 		}
557 	} else {
558 		sc->hw.bus_func = 0;
559 	}
560 
561 	/* Copy the permanent MAC address out of the EEPROM */
562 	if (em_read_mac_addr(&sc->hw) < 0) {
563 		printf("%s: EEPROM read error while reading mac address\n",
564 		       DEVNAME(sc));
565 		goto err_pci;
566 	}
567 
568 	bcopy(sc->hw.mac_addr, sc->sc_ac.ac_enaddr, ETHER_ADDR_LEN);
569 
570 	/* Setup OS specific network interface */
571 	if (!defer)
572 		em_setup_interface(sc);
573 
574 	/* Initialize statistics */
575 	em_clear_hw_cntrs(&sc->hw);
576 #if NKSTAT > 0
577 	em_kstat_attach(sc);
578 #endif
579 	sc->hw.get_link_status = 1;
580 	if (!defer)
581 		em_update_link_status(sc);
582 
583 #ifdef EM_DEBUG
584 	printf(", mac %#x phy %#x", sc->hw.mac_type, sc->hw.phy_type);
585 #endif
586 	printf(", address %s\n", ether_sprintf(sc->sc_ac.ac_enaddr));
587 
588 	/* Indicate SOL/IDER usage */
589 	if (em_check_phy_reset_block(&sc->hw))
590 		printf("%s: PHY reset is blocked due to SOL/IDER session.\n",
591 		    DEVNAME(sc));
592 
593 	/* Identify 82544 on PCI-X */
594 	em_get_bus_info(&sc->hw);
595 	if (sc->hw.bus_type == em_bus_type_pcix &&
596 	    sc->hw.mac_type == em_82544)
597 		sc->pcix_82544 = TRUE;
598         else
599 		sc->pcix_82544 = FALSE;
600 
601 	sc->hw.icp_xxxx_is_link_up = FALSE;
602 
603 	INIT_DEBUGOUT("em_attach: end");
604 	return;
605 
606 err_pci:
607 	em_free_pci_resources(sc);
608 }
609 
610 /*********************************************************************
611  *  Transmit entry point
612  *
613  *  em_start is called by the stack to initiate a transmit.
614  *  The driver will remain in this routine as long as there are
615  *  packets to transmit and transmit resources are available.
616  *  In case resources are not available stack is notified and
617  *  the packet is requeued.
618  **********************************************************************/
619 
620 void
621 em_start(struct ifqueue *ifq)
622 {
623 	struct ifnet *ifp = ifq->ifq_if;
624 	struct em_softc *sc = ifp->if_softc;
625 	u_int head, free, used;
626 	struct mbuf *m;
627 	int post = 0;
628 	struct em_queue *que = sc->queues; /* Use only first queue. */
629 
630 	if (!sc->link_active) {
631 		ifq_purge(ifq);
632 		return;
633 	}
634 
635 	/* calculate free space */
636 	head = que->tx.sc_tx_desc_head;
637 	free = que->tx.sc_tx_desc_tail;
638 	if (free <= head)
639 		free += sc->sc_tx_slots;
640 	free -= head;
641 
642 	if (sc->hw.mac_type != em_82547) {
643 		bus_dmamap_sync(sc->sc_dmat, que->tx.sc_tx_dma.dma_map,
644 		    0, que->tx.sc_tx_dma.dma_map->dm_mapsize,
645 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
646 	}
647 
648 	for (;;) {
649 		/* use 2 because cksum setup can use an extra slot */
650 		if (EM_MAX_SCATTER + 2 > free) {
651 			ifq_set_oactive(ifq);
652 			break;
653 		}
654 
655 		m = ifq_dequeue(ifq);
656 		if (m == NULL)
657 			break;
658 
659 		used = em_encap(que, m);
660 		if (used == 0) {
661 			m_freem(m);
662 			continue;
663 		}
664 
665 		KASSERT(used <= free);
666 
667 		free -= used;
668 
669 #if NBPFILTER > 0
670 		/* Send a copy of the frame to the BPF listener */
671 		if (ifp->if_bpf)
672 			bpf_mtap_ether(ifp->if_bpf, m, BPF_DIRECTION_OUT);
673 #endif
674 
675 		/* Set timeout in case hardware has problems transmitting */
676 		ifp->if_timer = EM_TX_TIMEOUT;
677 
678 		if (sc->hw.mac_type == em_82547) {
679 			int len = m->m_pkthdr.len;
680 
681 			if (sc->link_duplex == HALF_DUPLEX)
682 				em_82547_move_tail_locked(sc);
683 			else {
684 				E1000_WRITE_REG(&sc->hw, TDT(que->me),
685 				    que->tx.sc_tx_desc_head);
686 				em_82547_update_fifo_head(sc, len);
687 			}
688 		}
689 
690 		post = 1;
691 	}
692 
693 	if (sc->hw.mac_type != em_82547) {
694 		bus_dmamap_sync(sc->sc_dmat, que->tx.sc_tx_dma.dma_map,
695 		    0, que->tx.sc_tx_dma.dma_map->dm_mapsize,
696 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
697 		/*
698 		 * Advance the Transmit Descriptor Tail (Tdt),
699 		 * this tells the E1000 that this frame is
700 		 * available to transmit.
701 		 */
702 		if (post)
703 			E1000_WRITE_REG(&sc->hw, TDT(que->me),
704 			    que->tx.sc_tx_desc_head);
705 	}
706 }
707 
708 /*********************************************************************
709  *  Ioctl entry point
710  *
711  *  em_ioctl is called when the user wants to configure the
712  *  interface.
713  *
714  *  return 0 on success, positive on failure
715  **********************************************************************/
716 
717 int
718 em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
719 {
720 	int		error = 0;
721 	struct ifreq   *ifr = (struct ifreq *) data;
722 	struct em_softc *sc = ifp->if_softc;
723 	int s;
724 
725 	s = splnet();
726 
727 	switch (command) {
728 	case SIOCSIFADDR:
729 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFADDR (Set Interface "
730 			       "Addr)");
731 		if (!(ifp->if_flags & IFF_UP)) {
732 			ifp->if_flags |= IFF_UP;
733 			em_init(sc);
734 		}
735 		break;
736 
737 	case SIOCSIFFLAGS:
738 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFFLAGS (Set Interface Flags)");
739 		if (ifp->if_flags & IFF_UP) {
740 			if (ifp->if_flags & IFF_RUNNING)
741 				error = ENETRESET;
742 			else
743 				em_init(sc);
744 		} else {
745 			if (ifp->if_flags & IFF_RUNNING)
746 				em_stop(sc, 0);
747 		}
748 		break;
749 
750 	case SIOCSIFMEDIA:
751 		/* Check SOL/IDER usage */
752 		if (em_check_phy_reset_block(&sc->hw)) {
753 			printf("%s: Media change is blocked due to SOL/IDER session.\n",
754 			    DEVNAME(sc));
755 			break;
756 		}
757 	case SIOCGIFMEDIA:
758 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCxIFMEDIA (Get/Set Interface Media)");
759 		error = ifmedia_ioctl(ifp, ifr, &sc->media, command);
760 		break;
761 
762 	case SIOCGIFRXR:
763 		error = if_rxr_ioctl((struct if_rxrinfo *)ifr->ifr_data,
764 		    NULL, EM_MCLBYTES, &sc->queues->rx.sc_rx_ring);
765 		break;
766 
767 	default:
768 		error = ether_ioctl(ifp, &sc->sc_ac, command, data);
769 	}
770 
771 	if (error == ENETRESET) {
772 		if (ifp->if_flags & IFF_RUNNING) {
773 			em_disable_intr(sc);
774 			em_iff(sc);
775 			if (sc->hw.mac_type == em_82542_rev2_0)
776 				em_initialize_receive_unit(sc);
777 			em_enable_intr(sc);
778 		}
779 		error = 0;
780 	}
781 
782 	splx(s);
783 	return (error);
784 }
785 
786 /*********************************************************************
787  *  Watchdog entry point
788  *
789  *  This routine is called whenever hardware quits transmitting.
790  *
791  **********************************************************************/
792 
793 void
794 em_watchdog(struct ifnet *ifp)
795 {
796 	struct em_softc *sc = ifp->if_softc;
797 	struct em_queue *que = sc->queues; /* Use only first queue. */
798 
799 
800 	/* If we are in this routine because of pause frames, then
801 	 * don't reset the hardware.
802 	 */
803 	if (E1000_READ_REG(&sc->hw, STATUS) & E1000_STATUS_TXOFF) {
804 		ifp->if_timer = EM_TX_TIMEOUT;
805 		return;
806 	}
807 	printf("%s: watchdog: head %u tail %u TDH %u TDT %u\n",
808 	    DEVNAME(sc),
809 	    que->tx.sc_tx_desc_head, que->tx.sc_tx_desc_tail,
810 	    E1000_READ_REG(&sc->hw, TDH(que->me)),
811 	    E1000_READ_REG(&sc->hw, TDT(que->me)));
812 
813 	em_init(sc);
814 
815 	sc->watchdog_events++;
816 }
817 
818 /*********************************************************************
819  *  Init entry point
820  *
821  *  This routine is used in two ways. It is used by the stack as
822  *  init entry point in network interface structure. It is also used
823  *  by the driver as a hw/sw initialization routine to get to a
824  *  consistent state.
825  *
826  **********************************************************************/
827 
828 void
829 em_init(void *arg)
830 {
831 	struct em_softc *sc = arg;
832 	struct ifnet   *ifp = &sc->sc_ac.ac_if;
833 	uint32_t	pba;
834 	int s;
835 
836 	s = splnet();
837 
838 	INIT_DEBUGOUT("em_init: begin");
839 
840 	em_stop(sc, 0);
841 
842 	/*
843 	 * Packet Buffer Allocation (PBA)
844 	 * Writing PBA sets the receive portion of the buffer
845 	 * the remainder is used for the transmit buffer.
846 	 *
847 	 * Devices before the 82547 had a Packet Buffer of 64K.
848 	 *   Default allocation: PBA=48K for Rx, leaving 16K for Tx.
849 	 * After the 82547 the buffer was reduced to 40K.
850 	 *   Default allocation: PBA=30K for Rx, leaving 10K for Tx.
851 	 *   Note: default does not leave enough room for Jumbo Frame >10k.
852 	 */
853 	switch (sc->hw.mac_type) {
854 	case em_82547:
855 	case em_82547_rev_2: /* 82547: Total Packet Buffer is 40K */
856 		if (sc->hw.max_frame_size > EM_RXBUFFER_8192)
857 			pba = E1000_PBA_22K; /* 22K for Rx, 18K for Tx */
858 		else
859 			pba = E1000_PBA_30K; /* 30K for Rx, 10K for Tx */
860 		sc->tx_fifo_head = 0;
861 		sc->tx_head_addr = pba << EM_TX_HEAD_ADDR_SHIFT;
862 		sc->tx_fifo_size = (E1000_PBA_40K - pba) << EM_PBA_BYTES_SHIFT;
863 		break;
864 	case em_82571:
865 	case em_82572: /* Total Packet Buffer on these is 48k */
866 	case em_82575:
867 	case em_82576:
868 	case em_82580:
869 	case em_80003es2lan:
870 	case em_i350:
871 		pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
872 		break;
873 	case em_i210:
874 		pba = E1000_PBA_34K;
875 		break;
876 	case em_82573: /* 82573: Total Packet Buffer is 32K */
877 		/* Jumbo frames not supported */
878 		pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
879 		break;
880 	case em_82574: /* Total Packet Buffer is 40k */
881 		pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
882 		break;
883 	case em_ich8lan:
884 		pba = E1000_PBA_8K;
885 		break;
886 	case em_ich9lan:
887 	case em_ich10lan:
888 		/* Boost Receive side for jumbo frames */
889 		if (sc->hw.max_frame_size > EM_RXBUFFER_4096)
890 			pba = E1000_PBA_14K;
891 		else
892 			pba = E1000_PBA_10K;
893 		break;
894 	case em_pchlan:
895 	case em_pch2lan:
896 	case em_pch_lpt:
897 	case em_pch_spt:
898 	case em_pch_cnp:
899 		pba = E1000_PBA_26K;
900 		break;
901 	default:
902 		/* Devices before 82547 had a Packet Buffer of 64K.   */
903 		if (sc->hw.max_frame_size > EM_RXBUFFER_8192)
904 			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
905 		else
906 			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
907 	}
908 	INIT_DEBUGOUT1("em_init: pba=%dK",pba);
909 	E1000_WRITE_REG(&sc->hw, PBA, pba);
910 
911 	/* Get the latest mac address, User can use a LAA */
912 	bcopy(sc->sc_ac.ac_enaddr, sc->hw.mac_addr, ETHER_ADDR_LEN);
913 
914 	/* Initialize the hardware */
915 	if (em_hardware_init(sc)) {
916 		printf("%s: Unable to initialize the hardware\n",
917 		       DEVNAME(sc));
918 		splx(s);
919 		return;
920 	}
921 	em_update_link_status(sc);
922 
923 	E1000_WRITE_REG(&sc->hw, VET, ETHERTYPE_VLAN);
924 	if (ifp->if_capabilities & IFCAP_VLAN_HWTAGGING)
925 		em_enable_hw_vlans(sc);
926 
927 	/* Prepare transmit descriptors and buffers */
928 	if (em_setup_transmit_structures(sc)) {
929 		printf("%s: Could not setup transmit structures\n",
930 		       DEVNAME(sc));
931 		em_stop(sc, 0);
932 		splx(s);
933 		return;
934 	}
935 	em_initialize_transmit_unit(sc);
936 
937 	/* Prepare receive descriptors and buffers */
938 	if (em_setup_receive_structures(sc)) {
939 		printf("%s: Could not setup receive structures\n",
940 		       DEVNAME(sc));
941 		em_stop(sc, 0);
942 		splx(s);
943 		return;
944 	}
945 	em_initialize_receive_unit(sc);
946 
947 #ifndef SMALL_KERNEL
948 	if (sc->msix) {
949 		if (em_setup_queues_msix(sc)) {
950 			printf("%s: Can't setup msix queues\n", DEVNAME(sc));
951 			splx(s);
952 			return;
953 		}
954 	}
955 #endif
956 
957 	/* Program promiscuous mode and multicast filters. */
958 	em_iff(sc);
959 
960 	ifp->if_flags |= IFF_RUNNING;
961 	ifq_clr_oactive(&ifp->if_snd);
962 
963 	timeout_add_sec(&sc->timer_handle, 1);
964 	em_clear_hw_cntrs(&sc->hw);
965 	em_enable_intr(sc);
966 
967 	/* Don't reset the phy next time init gets called */
968 	sc->hw.phy_reset_disable = TRUE;
969 
970 	splx(s);
971 }
972 
973 /*********************************************************************
974  *
975  *  Interrupt Service routine
976  *
977  **********************************************************************/
978 int
979 em_intr(void *arg)
980 {
981 	struct em_softc	*sc = arg;
982 	struct em_queue *que = sc->queues; /* single queue */
983 	struct ifnet	*ifp = &sc->sc_ac.ac_if;
984 	u_int32_t	reg_icr, test_icr;
985 
986 	test_icr = reg_icr = E1000_READ_REG(&sc->hw, ICR);
987 	if (sc->hw.mac_type >= em_82571)
988 		test_icr = (reg_icr & E1000_ICR_INT_ASSERTED);
989 	if (!test_icr)
990 		return (0);
991 
992 	if (ifp->if_flags & IFF_RUNNING) {
993 		em_txeof(que);
994 		if (em_rxeof(que))
995 			em_rxrefill(que);
996 	}
997 
998 	/* Link status change */
999 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1000 		KERNEL_LOCK();
1001 		sc->hw.get_link_status = 1;
1002 		em_check_for_link(&sc->hw);
1003 		em_update_link_status(sc);
1004 		KERNEL_UNLOCK();
1005 	}
1006 
1007 	return (1);
1008 }
1009 
1010 /*********************************************************************
1011  *
1012  *  Media Ioctl callback
1013  *
1014  *  This routine is called whenever the user queries the status of
1015  *  the interface using ifconfig.
1016  *
1017  **********************************************************************/
1018 void
1019 em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1020 {
1021 	struct em_softc *sc = ifp->if_softc;
1022 	uint64_t fiber_type = IFM_1000_SX;
1023 	u_int16_t gsr;
1024 
1025 	INIT_DEBUGOUT("em_media_status: begin");
1026 
1027 	em_check_for_link(&sc->hw);
1028 	em_update_link_status(sc);
1029 
1030 	ifmr->ifm_status = IFM_AVALID;
1031 	ifmr->ifm_active = IFM_ETHER;
1032 
1033 	if (!sc->link_active) {
1034 		ifmr->ifm_active |= IFM_NONE;
1035 		return;
1036 	}
1037 
1038 	ifmr->ifm_status |= IFM_ACTIVE;
1039 
1040 	if (sc->hw.media_type == em_media_type_fiber ||
1041 	    sc->hw.media_type == em_media_type_internal_serdes) {
1042 		if (sc->hw.mac_type == em_82545)
1043 			fiber_type = IFM_1000_LX;
1044 		ifmr->ifm_active |= fiber_type | IFM_FDX;
1045 	} else {
1046 		switch (sc->link_speed) {
1047 		case 10:
1048 			ifmr->ifm_active |= IFM_10_T;
1049 			break;
1050 		case 100:
1051 			ifmr->ifm_active |= IFM_100_TX;
1052 			break;
1053 		case 1000:
1054 			ifmr->ifm_active |= IFM_1000_T;
1055 			break;
1056 		}
1057 
1058 		if (sc->link_duplex == FULL_DUPLEX)
1059 			ifmr->ifm_active |= em_flowstatus(sc) | IFM_FDX;
1060 		else
1061 			ifmr->ifm_active |= IFM_HDX;
1062 
1063 		if (IFM_SUBTYPE(ifmr->ifm_active) == IFM_1000_T) {
1064 			em_read_phy_reg(&sc->hw, PHY_1000T_STATUS, &gsr);
1065 			if (gsr & SR_1000T_MS_CONFIG_RES)
1066 				ifmr->ifm_active |= IFM_ETH_MASTER;
1067 		}
1068 	}
1069 }
1070 
1071 /*********************************************************************
1072  *
1073  *  Media Ioctl callback
1074  *
1075  *  This routine is called when the user changes speed/duplex using
1076  *  media/mediopt option with ifconfig.
1077  *
1078  **********************************************************************/
1079 int
1080 em_media_change(struct ifnet *ifp)
1081 {
1082 	struct em_softc *sc = ifp->if_softc;
1083 	struct ifmedia	*ifm = &sc->media;
1084 
1085 	INIT_DEBUGOUT("em_media_change: begin");
1086 
1087 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1088 		return (EINVAL);
1089 
1090 	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1091 	case IFM_AUTO:
1092 		sc->hw.autoneg = DO_AUTO_NEG;
1093 		sc->hw.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1094 		break;
1095 	case IFM_1000_LX:
1096 	case IFM_1000_SX:
1097 	case IFM_1000_T:
1098 		sc->hw.autoneg = DO_AUTO_NEG;
1099 		sc->hw.autoneg_advertised = ADVERTISE_1000_FULL;
1100 		break;
1101 	case IFM_100_TX:
1102 		sc->hw.autoneg = FALSE;
1103 		sc->hw.autoneg_advertised = 0;
1104 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1105 			sc->hw.forced_speed_duplex = em_100_full;
1106 		else
1107 			sc->hw.forced_speed_duplex = em_100_half;
1108 		break;
1109 	case IFM_10_T:
1110 		sc->hw.autoneg = FALSE;
1111 		sc->hw.autoneg_advertised = 0;
1112 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1113 			sc->hw.forced_speed_duplex = em_10_full;
1114 		else
1115 			sc->hw.forced_speed_duplex = em_10_half;
1116 		break;
1117 	default:
1118 		printf("%s: Unsupported media type\n", DEVNAME(sc));
1119 	}
1120 
1121 	/*
1122 	 * As the speed/duplex settings may have changed we need to
1123 	 * reset the PHY.
1124 	 */
1125 	sc->hw.phy_reset_disable = FALSE;
1126 
1127 	em_init(sc);
1128 
1129 	return (0);
1130 }
1131 
1132 uint64_t
1133 em_flowstatus(struct em_softc *sc)
1134 {
1135 	u_int16_t ar, lpar;
1136 
1137 	if (sc->hw.media_type == em_media_type_fiber ||
1138 	    sc->hw.media_type == em_media_type_internal_serdes)
1139 		return (0);
1140 
1141 	em_read_phy_reg(&sc->hw, PHY_AUTONEG_ADV, &ar);
1142 	em_read_phy_reg(&sc->hw, PHY_LP_ABILITY, &lpar);
1143 
1144 	if ((ar & NWAY_AR_PAUSE) && (lpar & NWAY_LPAR_PAUSE))
1145 		return (IFM_FLOW|IFM_ETH_TXPAUSE|IFM_ETH_RXPAUSE);
1146 	else if (!(ar & NWAY_AR_PAUSE) && (ar & NWAY_AR_ASM_DIR) &&
1147 		(lpar & NWAY_LPAR_PAUSE) && (lpar & NWAY_LPAR_ASM_DIR))
1148 		return (IFM_FLOW|IFM_ETH_TXPAUSE);
1149 	else if ((ar & NWAY_AR_PAUSE) && (ar & NWAY_AR_ASM_DIR) &&
1150 		!(lpar & NWAY_LPAR_PAUSE) && (lpar & NWAY_LPAR_ASM_DIR))
1151 		return (IFM_FLOW|IFM_ETH_RXPAUSE);
1152 
1153 	return (0);
1154 }
1155 
1156 /*********************************************************************
1157  *
1158  *  This routine maps the mbufs to tx descriptors.
1159  *
1160  *  return 0 on success, positive on failure
1161  **********************************************************************/
1162 u_int
1163 em_encap(struct em_queue *que, struct mbuf *m)
1164 {
1165 	struct em_softc *sc = que->sc;
1166 	struct em_packet *pkt;
1167 	struct em_tx_desc *desc;
1168 	bus_dmamap_t map;
1169 	u_int32_t txd_upper, txd_lower;
1170 	u_int head, last, used = 0;
1171 	int i, j;
1172 
1173 	/* For 82544 Workaround */
1174 	DESC_ARRAY		desc_array;
1175 	u_int32_t		array_elements;
1176 
1177 	/* get a dmamap for this packet from the next free slot */
1178 	head = que->tx.sc_tx_desc_head;
1179 	pkt = &que->tx.sc_tx_pkts_ring[head];
1180 	map = pkt->pkt_map;
1181 
1182 	switch (bus_dmamap_load_mbuf(sc->sc_dmat, map, m, BUS_DMA_NOWAIT)) {
1183 	case 0:
1184 		break;
1185 	case EFBIG:
1186 		if (m_defrag(m, M_DONTWAIT) == 0 &&
1187 		    bus_dmamap_load_mbuf(sc->sc_dmat, map, m,
1188 		     BUS_DMA_NOWAIT) == 0)
1189 			break;
1190 
1191 		/* FALLTHROUGH */
1192 	default:
1193 		sc->no_tx_dma_setup++;
1194 		return (0);
1195 	}
1196 
1197 	bus_dmamap_sync(sc->sc_dmat, map,
1198 	    0, map->dm_mapsize,
1199 	    BUS_DMASYNC_PREWRITE);
1200 
1201 	if (sc->hw.mac_type == em_82547) {
1202 		bus_dmamap_sync(sc->sc_dmat, que->tx.sc_tx_dma.dma_map,
1203 		    0, que->tx.sc_tx_dma.dma_map->dm_mapsize,
1204 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1205 	}
1206 
1207 	if (sc->hw.mac_type >= em_82543 && sc->hw.mac_type != em_82575 &&
1208 	    sc->hw.mac_type != em_82576 &&
1209 	    sc->hw.mac_type != em_82580 && sc->hw.mac_type != em_i210 &&
1210 	    sc->hw.mac_type != em_i350) {
1211 		used += em_transmit_checksum_setup(que, m, head,
1212 		    &txd_upper, &txd_lower);
1213 	} else {
1214 		txd_upper = txd_lower = 0;
1215 	}
1216 
1217 	head += used;
1218 	if (head >= sc->sc_tx_slots)
1219 		head -= sc->sc_tx_slots;
1220 
1221 	for (i = 0; i < map->dm_nsegs; i++) {
1222 		/* If sc is 82544 and on PCI-X bus */
1223 		if (sc->pcix_82544) {
1224 			/*
1225 			 * Check the Address and Length combination and
1226 			 * split the data accordingly
1227 			 */
1228 			array_elements = em_fill_descriptors(
1229 			    map->dm_segs[i].ds_addr, map->dm_segs[i].ds_len,
1230 			    &desc_array);
1231 			for (j = 0; j < array_elements; j++) {
1232 				desc = &que->tx.sc_tx_desc_ring[head];
1233 
1234 				desc->buffer_addr = htole64(
1235 					desc_array.descriptor[j].address);
1236 				desc->lower.data = htole32(
1237 					(que->tx.sc_txd_cmd | txd_lower |
1238 					 (u_int16_t)desc_array.descriptor[j].length));
1239 				desc->upper.data = htole32(txd_upper);
1240 
1241 				last = head;
1242 				if (++head == sc->sc_tx_slots)
1243 					head = 0;
1244 
1245 				used++;
1246 			}
1247 		} else {
1248 			desc = &que->tx.sc_tx_desc_ring[head];
1249 
1250 			desc->buffer_addr = htole64(map->dm_segs[i].ds_addr);
1251 			desc->lower.data = htole32(que->tx.sc_txd_cmd |
1252 			    txd_lower | map->dm_segs[i].ds_len);
1253 			desc->upper.data = htole32(txd_upper);
1254 
1255 			last = head;
1256 			if (++head == sc->sc_tx_slots)
1257 	        		head = 0;
1258 
1259 			used++;
1260 		}
1261 	}
1262 
1263 #if NVLAN > 0
1264 	/* Find out if we are in VLAN mode */
1265 	if (m->m_flags & M_VLANTAG) {
1266 		/* Set the VLAN id */
1267 		desc->upper.fields.special = htole16(m->m_pkthdr.ether_vtag);
1268 
1269 		/* Tell hardware to add tag */
1270 		desc->lower.data |= htole32(E1000_TXD_CMD_VLE);
1271 	}
1272 #endif
1273 
1274 	/* mark the packet with the mbuf and last desc slot */
1275 	pkt->pkt_m = m;
1276 	pkt->pkt_eop = last;
1277 
1278 	que->tx.sc_tx_desc_head = head;
1279 
1280 	/*
1281 	 * Last Descriptor of Packet
1282 	 * needs End Of Packet (EOP)
1283 	 * and Report Status (RS)
1284 	 */
1285 	desc->lower.data |= htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
1286 
1287 	if (sc->hw.mac_type == em_82547) {
1288 		bus_dmamap_sync(sc->sc_dmat, que->tx.sc_tx_dma.dma_map,
1289 		    0, que->tx.sc_tx_dma.dma_map->dm_mapsize,
1290 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1291 	}
1292 
1293 	return (used);
1294 }
1295 
1296 /*********************************************************************
1297  *
1298  * 82547 workaround to avoid controller hang in half-duplex environment.
1299  * The workaround is to avoid queuing a large packet that would span
1300  * the internal Tx FIFO ring boundary. We need to reset the FIFO pointers
1301  * in this case. We do that only when FIFO is quiescent.
1302  *
1303  **********************************************************************/
1304 void
1305 em_82547_move_tail_locked(struct em_softc *sc)
1306 {
1307 	uint16_t hw_tdt;
1308 	uint16_t sw_tdt;
1309 	struct em_tx_desc *tx_desc;
1310 	uint16_t length = 0;
1311 	boolean_t eop = 0;
1312 	struct em_queue *que = sc->queues; /* single queue chip */
1313 
1314 	hw_tdt = E1000_READ_REG(&sc->hw, TDT(que->me));
1315 	sw_tdt = que->tx.sc_tx_desc_head;
1316 
1317 	while (hw_tdt != sw_tdt) {
1318 		tx_desc = &que->tx.sc_tx_desc_ring[hw_tdt];
1319 		length += tx_desc->lower.flags.length;
1320 		eop = tx_desc->lower.data & E1000_TXD_CMD_EOP;
1321 		if (++hw_tdt == sc->sc_tx_slots)
1322 			hw_tdt = 0;
1323 
1324 		if (eop) {
1325 			if (em_82547_fifo_workaround(sc, length)) {
1326 				sc->tx_fifo_wrk_cnt++;
1327 				timeout_add(&sc->tx_fifo_timer_handle, 1);
1328 				break;
1329 			}
1330 			E1000_WRITE_REG(&sc->hw, TDT(que->me), hw_tdt);
1331 			em_82547_update_fifo_head(sc, length);
1332 			length = 0;
1333 		}
1334 	}
1335 }
1336 
1337 void
1338 em_82547_move_tail(void *arg)
1339 {
1340 	struct em_softc *sc = arg;
1341 	int s;
1342 
1343 	s = splnet();
1344 	em_82547_move_tail_locked(sc);
1345 	splx(s);
1346 }
1347 
1348 int
1349 em_82547_fifo_workaround(struct em_softc *sc, int len)
1350 {
1351 	int fifo_space, fifo_pkt_len;
1352 
1353 	fifo_pkt_len = EM_ROUNDUP(len + EM_FIFO_HDR, EM_FIFO_HDR);
1354 
1355 	if (sc->link_duplex == HALF_DUPLEX) {
1356 		fifo_space = sc->tx_fifo_size - sc->tx_fifo_head;
1357 
1358 		if (fifo_pkt_len >= (EM_82547_PKT_THRESH + fifo_space)) {
1359 			if (em_82547_tx_fifo_reset(sc))
1360 				return (0);
1361 			else
1362 				return (1);
1363 		}
1364 	}
1365 
1366 	return (0);
1367 }
1368 
1369 void
1370 em_82547_update_fifo_head(struct em_softc *sc, int len)
1371 {
1372 	int fifo_pkt_len = EM_ROUNDUP(len + EM_FIFO_HDR, EM_FIFO_HDR);
1373 
1374 	/* tx_fifo_head is always 16 byte aligned */
1375 	sc->tx_fifo_head += fifo_pkt_len;
1376 	if (sc->tx_fifo_head >= sc->tx_fifo_size)
1377 		sc->tx_fifo_head -= sc->tx_fifo_size;
1378 }
1379 
1380 int
1381 em_82547_tx_fifo_reset(struct em_softc *sc)
1382 {
1383 	uint32_t tctl;
1384 	struct em_queue *que = sc->queues; /* single queue chip */
1385 
1386 	if ((E1000_READ_REG(&sc->hw, TDT(que->me)) ==
1387 	     E1000_READ_REG(&sc->hw, TDH(que->me))) &&
1388 	    (E1000_READ_REG(&sc->hw, TDFT) ==
1389 	     E1000_READ_REG(&sc->hw, TDFH)) &&
1390 	    (E1000_READ_REG(&sc->hw, TDFTS) ==
1391 	     E1000_READ_REG(&sc->hw, TDFHS)) &&
1392 	    (E1000_READ_REG(&sc->hw, TDFPC) == 0)) {
1393 
1394 		/* Disable TX unit */
1395 		tctl = E1000_READ_REG(&sc->hw, TCTL);
1396 		E1000_WRITE_REG(&sc->hw, TCTL, tctl & ~E1000_TCTL_EN);
1397 
1398 		/* Reset FIFO pointers */
1399 		E1000_WRITE_REG(&sc->hw, TDFT, sc->tx_head_addr);
1400 		E1000_WRITE_REG(&sc->hw, TDFH, sc->tx_head_addr);
1401 		E1000_WRITE_REG(&sc->hw, TDFTS, sc->tx_head_addr);
1402 		E1000_WRITE_REG(&sc->hw, TDFHS, sc->tx_head_addr);
1403 
1404 		/* Re-enable TX unit */
1405 		E1000_WRITE_REG(&sc->hw, TCTL, tctl);
1406 		E1000_WRITE_FLUSH(&sc->hw);
1407 
1408 		sc->tx_fifo_head = 0;
1409 		sc->tx_fifo_reset_cnt++;
1410 
1411 		return (TRUE);
1412 	} else
1413 		return (FALSE);
1414 }
1415 
1416 void
1417 em_iff(struct em_softc *sc)
1418 {
1419 	struct ifnet *ifp = &sc->sc_ac.ac_if;
1420 	struct arpcom *ac = &sc->sc_ac;
1421 	u_int32_t reg_rctl = 0;
1422 	u_int8_t  mta[MAX_NUM_MULTICAST_ADDRESSES * ETH_LENGTH_OF_ADDRESS];
1423 	struct ether_multi *enm;
1424 	struct ether_multistep step;
1425 	int i = 0;
1426 
1427 	IOCTL_DEBUGOUT("em_iff: begin");
1428 
1429 	if (sc->hw.mac_type == em_82542_rev2_0) {
1430 		reg_rctl = E1000_READ_REG(&sc->hw, RCTL);
1431 		if (sc->hw.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
1432 			em_pci_clear_mwi(&sc->hw);
1433 		reg_rctl |= E1000_RCTL_RST;
1434 		E1000_WRITE_REG(&sc->hw, RCTL, reg_rctl);
1435 		msec_delay(5);
1436 	}
1437 
1438 	reg_rctl = E1000_READ_REG(&sc->hw, RCTL);
1439 	reg_rctl &= ~(E1000_RCTL_MPE | E1000_RCTL_UPE);
1440 	ifp->if_flags &= ~IFF_ALLMULTI;
1441 
1442 	if (ifp->if_flags & IFF_PROMISC || ac->ac_multirangecnt > 0 ||
1443 	    ac->ac_multicnt > MAX_NUM_MULTICAST_ADDRESSES) {
1444 		ifp->if_flags |= IFF_ALLMULTI;
1445 		reg_rctl |= E1000_RCTL_MPE;
1446 		if (ifp->if_flags & IFF_PROMISC)
1447 			reg_rctl |= E1000_RCTL_UPE;
1448 	} else {
1449 		ETHER_FIRST_MULTI(step, ac, enm);
1450 		while (enm != NULL) {
1451 			bcopy(enm->enm_addrlo, mta + i, ETH_LENGTH_OF_ADDRESS);
1452 			i += ETH_LENGTH_OF_ADDRESS;
1453 
1454 			ETHER_NEXT_MULTI(step, enm);
1455 		}
1456 
1457 		em_mc_addr_list_update(&sc->hw, mta, ac->ac_multicnt, 0, 1);
1458 	}
1459 
1460 	E1000_WRITE_REG(&sc->hw, RCTL, reg_rctl);
1461 
1462 	if (sc->hw.mac_type == em_82542_rev2_0) {
1463 		reg_rctl = E1000_READ_REG(&sc->hw, RCTL);
1464 		reg_rctl &= ~E1000_RCTL_RST;
1465 		E1000_WRITE_REG(&sc->hw, RCTL, reg_rctl);
1466 		msec_delay(5);
1467 		if (sc->hw.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
1468 			em_pci_set_mwi(&sc->hw);
1469 	}
1470 }
1471 
1472 /*********************************************************************
1473  *  Timer routine
1474  *
1475  *  This routine checks for link status and updates statistics.
1476  *
1477  **********************************************************************/
1478 
1479 void
1480 em_local_timer(void *arg)
1481 {
1482 	struct em_softc *sc = arg;
1483 	int s;
1484 
1485 	timeout_add_sec(&sc->timer_handle, 1);
1486 
1487 	s = splnet();
1488 	em_smartspeed(sc);
1489 	splx(s);
1490 
1491 #if NKSTAT > 0
1492 	if (sc->kstat != NULL && mtx_enter_try(&sc->kstat_mtx)) {
1493 		em_kstat_read(sc->kstat);
1494 		mtx_leave(&sc->kstat_mtx);
1495 	}
1496 #endif
1497 }
1498 
1499 void
1500 em_update_link_status(struct em_softc *sc)
1501 {
1502 	struct ifnet *ifp = &sc->sc_ac.ac_if;
1503 	u_char link_state;
1504 
1505 	if (E1000_READ_REG(&sc->hw, STATUS) & E1000_STATUS_LU) {
1506 		if (sc->link_active == 0) {
1507 			em_get_speed_and_duplex(&sc->hw,
1508 						&sc->link_speed,
1509 						&sc->link_duplex);
1510 			/* Check if we may set SPEED_MODE bit on PCI-E */
1511 			if ((sc->link_speed == SPEED_1000) &&
1512 			    ((sc->hw.mac_type == em_82571) ||
1513 			    (sc->hw.mac_type == em_82572) ||
1514 			    (sc->hw.mac_type == em_82575) ||
1515 			    (sc->hw.mac_type == em_82576) ||
1516 			    (sc->hw.mac_type == em_82580))) {
1517 				int tarc0;
1518 
1519 				tarc0 = E1000_READ_REG(&sc->hw, TARC0);
1520 				tarc0 |= SPEED_MODE_BIT;
1521 				E1000_WRITE_REG(&sc->hw, TARC0, tarc0);
1522 			}
1523 			sc->link_active = 1;
1524 			sc->smartspeed = 0;
1525 			ifp->if_baudrate = IF_Mbps(sc->link_speed);
1526 		}
1527 		link_state = (sc->link_duplex == FULL_DUPLEX) ?
1528 		    LINK_STATE_FULL_DUPLEX : LINK_STATE_HALF_DUPLEX;
1529 	} else {
1530 		if (sc->link_active == 1) {
1531 			ifp->if_baudrate = sc->link_speed = 0;
1532 			sc->link_duplex = 0;
1533 			sc->link_active = 0;
1534 		}
1535 		link_state = LINK_STATE_DOWN;
1536 	}
1537 	if (ifp->if_link_state != link_state) {
1538 		ifp->if_link_state = link_state;
1539 		if_link_state_change(ifp);
1540 	}
1541 }
1542 
1543 /*********************************************************************
1544  *
1545  *  This routine disables all traffic on the adapter by issuing a
1546  *  global reset on the MAC and deallocates TX/RX buffers.
1547  *
1548  **********************************************************************/
1549 
1550 void
1551 em_stop(void *arg, int softonly)
1552 {
1553 	struct em_softc *sc = arg;
1554 	struct em_queue *que = sc->queues; /* Use only first queue. */
1555 	struct ifnet   *ifp = &sc->sc_ac.ac_if;
1556 
1557 	/* Tell the stack that the interface is no longer active */
1558 	ifp->if_flags &= ~IFF_RUNNING;
1559 
1560 	INIT_DEBUGOUT("em_stop: begin");
1561 
1562 	timeout_del(&que->rx_refill);
1563 	timeout_del(&sc->timer_handle);
1564 	timeout_del(&sc->tx_fifo_timer_handle);
1565 
1566 	if (!softonly)
1567 		em_disable_intr(sc);
1568 	if (sc->hw.mac_type >= em_pch_spt)
1569 		em_flush_desc_rings(sc);
1570 	if (!softonly)
1571 		em_reset_hw(&sc->hw);
1572 
1573 	intr_barrier(sc->sc_intrhand);
1574 	ifq_barrier(&ifp->if_snd);
1575 
1576 	KASSERT((ifp->if_flags & IFF_RUNNING) == 0);
1577 
1578 	ifq_clr_oactive(&ifp->if_snd);
1579 	ifp->if_timer = 0;
1580 
1581 	em_free_transmit_structures(sc);
1582 	em_free_receive_structures(sc);
1583 }
1584 
1585 /*********************************************************************
1586  *
1587  *  Determine hardware revision.
1588  *
1589  **********************************************************************/
1590 void
1591 em_identify_hardware(struct em_softc *sc)
1592 {
1593 	u_int32_t reg;
1594 	struct pci_attach_args *pa = &sc->osdep.em_pa;
1595 
1596 	/* Make sure our PCI config space has the necessary stuff set */
1597 	sc->hw.pci_cmd_word = pci_conf_read(pa->pa_pc, pa->pa_tag,
1598 					    PCI_COMMAND_STATUS_REG);
1599 
1600 	/* Save off the information about this board */
1601 	sc->hw.vendor_id = PCI_VENDOR(pa->pa_id);
1602 	sc->hw.device_id = PCI_PRODUCT(pa->pa_id);
1603 
1604 	reg = pci_conf_read(pa->pa_pc, pa->pa_tag, PCI_CLASS_REG);
1605 	sc->hw.revision_id = PCI_REVISION(reg);
1606 
1607 	reg = pci_conf_read(pa->pa_pc, pa->pa_tag, PCI_SUBSYS_ID_REG);
1608 	sc->hw.subsystem_vendor_id = PCI_VENDOR(reg);
1609 	sc->hw.subsystem_id = PCI_PRODUCT(reg);
1610 
1611 	/* Identify the MAC */
1612 	if (em_set_mac_type(&sc->hw))
1613 		printf("%s: Unknown MAC Type\n", DEVNAME(sc));
1614 
1615 	if (sc->hw.mac_type == em_pchlan)
1616 		sc->hw.revision_id = PCI_PRODUCT(pa->pa_id) & 0x0f;
1617 
1618 	if (sc->hw.mac_type == em_82541 ||
1619 	    sc->hw.mac_type == em_82541_rev_2 ||
1620 	    sc->hw.mac_type == em_82547 ||
1621 	    sc->hw.mac_type == em_82547_rev_2)
1622 		sc->hw.phy_init_script = TRUE;
1623 }
1624 
1625 void
1626 em_legacy_irq_quirk_spt(struct em_softc *sc)
1627 {
1628 	uint32_t	reg;
1629 
1630 	/* Legacy interrupt: SPT needs a quirk. */
1631 	if (sc->hw.mac_type != em_pch_spt && sc->hw.mac_type != em_pch_cnp)
1632 		return;
1633 	if (sc->legacy_irq == 0)
1634 		return;
1635 
1636 	reg = EM_READ_REG(&sc->hw, E1000_FEXTNVM7);
1637 	reg |= E1000_FEXTNVM7_SIDE_CLK_UNGATE;
1638 	EM_WRITE_REG(&sc->hw, E1000_FEXTNVM7, reg);
1639 
1640 	reg = EM_READ_REG(&sc->hw, E1000_FEXTNVM9);
1641 	reg |= E1000_FEXTNVM9_IOSFSB_CLKGATE_DIS |
1642 	    E1000_FEXTNVM9_IOSFSB_CLKREQ_DIS;
1643 	EM_WRITE_REG(&sc->hw, E1000_FEXTNVM9, reg);
1644 }
1645 
1646 int
1647 em_allocate_pci_resources(struct em_softc *sc)
1648 {
1649 	int		val, rid;
1650 	struct pci_attach_args *pa = &sc->osdep.em_pa;
1651 	struct em_queue	       *que = NULL;
1652 
1653 	val = pci_conf_read(pa->pa_pc, pa->pa_tag, EM_MMBA);
1654 	if (PCI_MAPREG_TYPE(val) != PCI_MAPREG_TYPE_MEM) {
1655 		printf(": mmba is not mem space\n");
1656 		return (ENXIO);
1657 	}
1658 	if (pci_mapreg_map(pa, EM_MMBA, PCI_MAPREG_MEM_TYPE(val), 0,
1659 	    &sc->osdep.mem_bus_space_tag, &sc->osdep.mem_bus_space_handle,
1660 	    &sc->osdep.em_membase, &sc->osdep.em_memsize, 0)) {
1661 		printf(": cannot find mem space\n");
1662 		return (ENXIO);
1663 	}
1664 
1665 	switch (sc->hw.mac_type) {
1666 	case em_82544:
1667 	case em_82540:
1668 	case em_82545:
1669 	case em_82546:
1670 	case em_82541:
1671 	case em_82541_rev_2:
1672 		/* Figure out where our I/O BAR is ? */
1673 		for (rid = PCI_MAPREG_START; rid < PCI_MAPREG_END;) {
1674 			val = pci_conf_read(pa->pa_pc, pa->pa_tag, rid);
1675 			if (PCI_MAPREG_TYPE(val) == PCI_MAPREG_TYPE_IO) {
1676 				sc->io_rid = rid;
1677 				break;
1678 			}
1679 			rid += 4;
1680 			if (PCI_MAPREG_MEM_TYPE(val) ==
1681 			    PCI_MAPREG_MEM_TYPE_64BIT)
1682 				rid += 4;	/* skip high bits, too */
1683 		}
1684 
1685 		if (pci_mapreg_map(pa, rid, PCI_MAPREG_TYPE_IO, 0,
1686 		    &sc->osdep.io_bus_space_tag, &sc->osdep.io_bus_space_handle,
1687 		    &sc->osdep.em_iobase, &sc->osdep.em_iosize, 0)) {
1688 			printf(": cannot find i/o space\n");
1689 			return (ENXIO);
1690 		}
1691 
1692 		sc->hw.io_base = 0;
1693 		break;
1694 	default:
1695 		break;
1696 	}
1697 
1698 	sc->osdep.em_flashoffset = 0;
1699 	/* for ICH8 and family we need to find the flash memory */
1700 	if (sc->hw.mac_type >= em_pch_spt) {
1701 		sc->osdep.flash_bus_space_tag = sc->osdep.mem_bus_space_tag;
1702 		sc->osdep.flash_bus_space_handle = sc->osdep.mem_bus_space_handle;
1703 		sc->osdep.em_flashbase = 0;
1704 		sc->osdep.em_flashsize = 0;
1705 		sc->osdep.em_flashoffset = 0xe000;
1706 	} else if (IS_ICH8(sc->hw.mac_type)) {
1707 		val = pci_conf_read(pa->pa_pc, pa->pa_tag, EM_FLASH);
1708 		if (PCI_MAPREG_TYPE(val) != PCI_MAPREG_TYPE_MEM) {
1709 			printf(": flash is not mem space\n");
1710 			return (ENXIO);
1711 		}
1712 
1713 		if (pci_mapreg_map(pa, EM_FLASH, PCI_MAPREG_MEM_TYPE(val), 0,
1714 		    &sc->osdep.flash_bus_space_tag, &sc->osdep.flash_bus_space_handle,
1715 		    &sc->osdep.em_flashbase, &sc->osdep.em_flashsize, 0)) {
1716 			printf(": cannot find mem space\n");
1717 			return (ENXIO);
1718 		}
1719         }
1720 
1721 	sc->osdep.dev = (struct device *)sc;
1722 	sc->hw.back = &sc->osdep;
1723 
1724 	/* Only one queue for the moment. */
1725 	que = malloc(sizeof(struct em_queue), M_DEVBUF, M_NOWAIT | M_ZERO);
1726 	if (que == NULL) {
1727 		printf(": unable to allocate queue memory\n");
1728 		return (ENOMEM);
1729 	}
1730 	que->me = 0;
1731 	que->sc = sc;
1732 	timeout_set(&que->rx_refill, em_rxrefill, que);
1733 
1734 	sc->queues = que;
1735 	sc->num_queues = 1;
1736 	sc->msix = 0;
1737 	sc->legacy_irq = 0;
1738 	if (em_allocate_msix(sc) && em_allocate_legacy(sc))
1739 		return (ENXIO);
1740 
1741 	/*
1742 	 * the ICP_xxxx device has multiple, duplicate register sets for
1743 	 * use when it is being used as a network processor. Disable those
1744 	 * registers here, as they are not necessary in this context and
1745 	 * can confuse the system
1746 	 */
1747 	if(sc->hw.mac_type == em_icp_xxxx) {
1748 		int offset;
1749 		pcireg_t val;
1750 
1751 		if (!pci_get_capability(sc->osdep.em_pa.pa_pc,
1752 		    sc->osdep.em_pa.pa_tag, PCI_CAP_ID_ST, &offset, &val)) {
1753 			return (0);
1754 		}
1755 		offset += PCI_ST_SMIA_OFFSET;
1756 		pci_conf_write(sc->osdep.em_pa.pa_pc, sc->osdep.em_pa.pa_tag,
1757 		    offset, 0x06);
1758 		E1000_WRITE_REG(&sc->hw, IMC1, ~0x0);
1759 		E1000_WRITE_REG(&sc->hw, IMC2, ~0x0);
1760 	}
1761 	return (0);
1762 }
1763 
1764 void
1765 em_free_pci_resources(struct em_softc *sc)
1766 {
1767 	struct pci_attach_args *pa = &sc->osdep.em_pa;
1768 	pci_chipset_tag_t	pc = pa->pa_pc;
1769 	struct em_queue	       *que = NULL;
1770 	if (sc->sc_intrhand)
1771 		pci_intr_disestablish(pc, sc->sc_intrhand);
1772 	sc->sc_intrhand = 0;
1773 
1774 	if (sc->osdep.em_flashbase)
1775 		bus_space_unmap(sc->osdep.flash_bus_space_tag, sc->osdep.flash_bus_space_handle,
1776 				sc->osdep.em_flashsize);
1777 	sc->osdep.em_flashbase = 0;
1778 
1779 	if (sc->osdep.em_iobase)
1780 		bus_space_unmap(sc->osdep.io_bus_space_tag, sc->osdep.io_bus_space_handle,
1781 				sc->osdep.em_iosize);
1782 	sc->osdep.em_iobase = 0;
1783 
1784 	if (sc->osdep.em_membase)
1785 		bus_space_unmap(sc->osdep.mem_bus_space_tag, sc->osdep.mem_bus_space_handle,
1786 				sc->osdep.em_memsize);
1787 	sc->osdep.em_membase = 0;
1788 
1789 	FOREACH_QUEUE(sc, que) {
1790 		if (que->rx.sc_rx_desc_ring != NULL) {
1791 			que->rx.sc_rx_desc_ring = NULL;
1792 			em_dma_free(sc, &que->rx.sc_rx_dma);
1793 		}
1794 		if (que->tx.sc_tx_desc_ring != NULL) {
1795 			que->tx.sc_tx_desc_ring = NULL;
1796 			em_dma_free(sc, &que->tx.sc_tx_dma);
1797 		}
1798 		if (que->tag)
1799 			pci_intr_disestablish(pc, que->tag);
1800 		que->tag = NULL;
1801 		que->eims = 0;
1802 		que->me = 0;
1803 		que->sc = NULL;
1804 	}
1805 	sc->legacy_irq = 0;
1806 	sc->msix_linkvec = 0;
1807 	sc->msix_queuesmask = 0;
1808 	if (sc->queues)
1809 		free(sc->queues, M_DEVBUF,
1810 		    sc->num_queues * sizeof(struct em_queue));
1811 	sc->num_queues = 0;
1812 	sc->queues = NULL;
1813 }
1814 
1815 /*********************************************************************
1816  *
1817  *  Initialize the hardware to a configuration as specified by the
1818  *  em_softc structure. The controller is reset, the EEPROM is
1819  *  verified, the MAC address is set, then the shared initialization
1820  *  routines are called.
1821  *
1822  **********************************************************************/
1823 int
1824 em_hardware_init(struct em_softc *sc)
1825 {
1826 	uint32_t ret_val;
1827 	u_int16_t rx_buffer_size;
1828 
1829 	INIT_DEBUGOUT("em_hardware_init: begin");
1830 	if (sc->hw.mac_type >= em_pch_spt)
1831 		em_flush_desc_rings(sc);
1832 	/* Issue a global reset */
1833 	em_reset_hw(&sc->hw);
1834 
1835 	/* When hardware is reset, fifo_head is also reset */
1836 	sc->tx_fifo_head = 0;
1837 
1838 	/* Make sure we have a good EEPROM before we read from it */
1839 	if (em_get_flash_presence_i210(&sc->hw) &&
1840 	    em_validate_eeprom_checksum(&sc->hw) < 0) {
1841 		/*
1842 		 * Some PCIe parts fail the first check due to
1843 		 * the link being in sleep state, call it again,
1844 		 * if it fails a second time its a real issue.
1845 		 */
1846 		if (em_validate_eeprom_checksum(&sc->hw) < 0) {
1847 			printf("%s: The EEPROM Checksum Is Not Valid\n",
1848 			       DEVNAME(sc));
1849 			return (EIO);
1850 		}
1851 	}
1852 
1853 	if (em_get_flash_presence_i210(&sc->hw) &&
1854 	    em_read_part_num(&sc->hw, &(sc->part_num)) < 0) {
1855 		printf("%s: EEPROM read error while reading part number\n",
1856 		       DEVNAME(sc));
1857 		return (EIO);
1858 	}
1859 
1860 	/* Set up smart power down as default off on newer adapters */
1861 	if (!em_smart_pwr_down &&
1862 	     (sc->hw.mac_type == em_82571 ||
1863 	      sc->hw.mac_type == em_82572 ||
1864 	      sc->hw.mac_type == em_82575 ||
1865 	      sc->hw.mac_type == em_82576 ||
1866 	      sc->hw.mac_type == em_82580 ||
1867 	      sc->hw.mac_type == em_i210 ||
1868 	      sc->hw.mac_type == em_i350 )) {
1869 		uint16_t phy_tmp = 0;
1870 
1871 		/* Speed up time to link by disabling smart power down */
1872 		em_read_phy_reg(&sc->hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
1873 		phy_tmp &= ~IGP02E1000_PM_SPD;
1874 		em_write_phy_reg(&sc->hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
1875 	}
1876 
1877 	em_legacy_irq_quirk_spt(sc);
1878 
1879 	/*
1880 	 * These parameters control the automatic generation (Tx) and
1881 	 * response (Rx) to Ethernet PAUSE frames.
1882 	 * - High water mark should allow for at least two frames to be
1883 	 *   received after sending an XOFF.
1884 	 * - Low water mark works best when it is very near the high water mark.
1885 	 *   This allows the receiver to restart by sending XON when it has
1886 	 *   drained a bit.  Here we use an arbitary value of 1500 which will
1887 	 *   restart after one full frame is pulled from the buffer.  There
1888 	 *   could be several smaller frames in the buffer and if so they will
1889 	 *   not trigger the XON until their total number reduces the buffer
1890 	 *   by 1500.
1891 	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
1892 	 */
1893 	rx_buffer_size = ((E1000_READ_REG(&sc->hw, PBA) & 0xffff) << 10 );
1894 
1895 	sc->hw.fc_high_water = rx_buffer_size -
1896 	    EM_ROUNDUP(sc->hw.max_frame_size, 1024);
1897 	sc->hw.fc_low_water = sc->hw.fc_high_water - 1500;
1898 	if (sc->hw.mac_type == em_80003es2lan)
1899 		sc->hw.fc_pause_time = 0xFFFF;
1900 	else
1901 		sc->hw.fc_pause_time = 1000;
1902 	sc->hw.fc_send_xon = TRUE;
1903 	sc->hw.fc = E1000_FC_FULL;
1904 
1905 	em_disable_aspm(sc);
1906 
1907 	if ((ret_val = em_init_hw(sc)) != 0) {
1908 		if (ret_val == E1000_DEFER_INIT) {
1909 			INIT_DEBUGOUT("\nHardware Initialization Deferred ");
1910 			return (EAGAIN);
1911 		}
1912 		printf("\n%s: Hardware Initialization Failed: %d\n",
1913 		       DEVNAME(sc), ret_val);
1914 		return (EIO);
1915 	}
1916 
1917 	em_check_for_link(&sc->hw);
1918 
1919 	return (0);
1920 }
1921 
1922 /*********************************************************************
1923  *
1924  *  Setup networking device structure and register an interface.
1925  *
1926  **********************************************************************/
1927 void
1928 em_setup_interface(struct em_softc *sc)
1929 {
1930 	struct ifnet   *ifp;
1931 	uint64_t fiber_type = IFM_1000_SX;
1932 
1933 	INIT_DEBUGOUT("em_setup_interface: begin");
1934 
1935 	ifp = &sc->sc_ac.ac_if;
1936 	strlcpy(ifp->if_xname, DEVNAME(sc), IFNAMSIZ);
1937 	ifp->if_softc = sc;
1938 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1939 	ifp->if_xflags = IFXF_MPSAFE;
1940 	ifp->if_ioctl = em_ioctl;
1941 	ifp->if_qstart = em_start;
1942 	ifp->if_watchdog = em_watchdog;
1943 	ifp->if_hardmtu =
1944 		sc->hw.max_frame_size - ETHER_HDR_LEN - ETHER_CRC_LEN;
1945 	ifq_set_maxlen(&ifp->if_snd, sc->sc_tx_slots - 1);
1946 
1947 	ifp->if_capabilities = IFCAP_VLAN_MTU;
1948 
1949 #if NVLAN > 0
1950 	if (sc->hw.mac_type != em_82575 && sc->hw.mac_type != em_82580 &&
1951 	    sc->hw.mac_type != em_82576 &&
1952 	    sc->hw.mac_type != em_i210 && sc->hw.mac_type != em_i350)
1953 		ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING;
1954 #endif
1955 
1956 	if (sc->hw.mac_type >= em_82543 && sc->hw.mac_type != em_82575 &&
1957 	    sc->hw.mac_type != em_82576 &&
1958 	    sc->hw.mac_type != em_82580 && sc->hw.mac_type != em_i210 &&
1959 	    sc->hw.mac_type != em_i350)
1960 		ifp->if_capabilities |= IFCAP_CSUM_TCPv4 | IFCAP_CSUM_UDPv4;
1961 
1962 	/*
1963 	 * Specify the media types supported by this adapter and register
1964 	 * callbacks to update media and link information
1965 	 */
1966 	ifmedia_init(&sc->media, IFM_IMASK, em_media_change,
1967 		     em_media_status);
1968 	if (sc->hw.media_type == em_media_type_fiber ||
1969 	    sc->hw.media_type == em_media_type_internal_serdes) {
1970 		if (sc->hw.mac_type == em_82545)
1971 			fiber_type = IFM_1000_LX;
1972 		ifmedia_add(&sc->media, IFM_ETHER | fiber_type | IFM_FDX,
1973 			    0, NULL);
1974 		ifmedia_add(&sc->media, IFM_ETHER | fiber_type,
1975 			    0, NULL);
1976 	} else {
1977 		ifmedia_add(&sc->media, IFM_ETHER | IFM_10_T, 0, NULL);
1978 		ifmedia_add(&sc->media, IFM_ETHER | IFM_10_T | IFM_FDX,
1979 			    0, NULL);
1980 		ifmedia_add(&sc->media, IFM_ETHER | IFM_100_TX,
1981 			    0, NULL);
1982 		ifmedia_add(&sc->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
1983 			    0, NULL);
1984 		if (sc->hw.phy_type != em_phy_ife) {
1985 			ifmedia_add(&sc->media, IFM_ETHER | IFM_1000_T | IFM_FDX,
1986 				    0, NULL);
1987 			ifmedia_add(&sc->media, IFM_ETHER | IFM_1000_T, 0, NULL);
1988 		}
1989 	}
1990 	ifmedia_add(&sc->media, IFM_ETHER | IFM_AUTO, 0, NULL);
1991 	ifmedia_set(&sc->media, IFM_ETHER | IFM_AUTO);
1992 
1993 	if_attach(ifp);
1994 	ether_ifattach(ifp);
1995 	em_enable_intr(sc);
1996 }
1997 
1998 int
1999 em_detach(struct device *self, int flags)
2000 {
2001 	struct em_softc *sc = (struct em_softc *)self;
2002 	struct ifnet *ifp = &sc->sc_ac.ac_if;
2003 	struct pci_attach_args *pa = &sc->osdep.em_pa;
2004 	pci_chipset_tag_t	pc = pa->pa_pc;
2005 
2006 	if (sc->sc_intrhand)
2007 		pci_intr_disestablish(pc, sc->sc_intrhand);
2008 	sc->sc_intrhand = 0;
2009 
2010 	em_stop(sc, 1);
2011 
2012 	em_free_pci_resources(sc);
2013 
2014 	ether_ifdetach(ifp);
2015 	if_detach(ifp);
2016 
2017 	return (0);
2018 }
2019 
2020 int
2021 em_activate(struct device *self, int act)
2022 {
2023 	struct em_softc *sc = (struct em_softc *)self;
2024 	struct ifnet *ifp = &sc->sc_ac.ac_if;
2025 	int rv = 0;
2026 
2027 	switch (act) {
2028 	case DVACT_SUSPEND:
2029 		if (ifp->if_flags & IFF_RUNNING)
2030 			em_stop(sc, 0);
2031 		/* We have no children atm, but we will soon */
2032 		rv = config_activate_children(self, act);
2033 		break;
2034 	case DVACT_RESUME:
2035 		if (ifp->if_flags & IFF_UP)
2036 			em_init(sc);
2037 		break;
2038 	default:
2039 		rv = config_activate_children(self, act);
2040 		break;
2041 	}
2042 	return (rv);
2043 }
2044 
2045 /*********************************************************************
2046  *
2047  *  Workaround for SmartSpeed on 82541 and 82547 controllers
2048  *
2049  **********************************************************************/
2050 void
2051 em_smartspeed(struct em_softc *sc)
2052 {
2053 	uint16_t phy_tmp;
2054 
2055 	if (sc->link_active || (sc->hw.phy_type != em_phy_igp) ||
2056 	    !sc->hw.autoneg || !(sc->hw.autoneg_advertised & ADVERTISE_1000_FULL))
2057 		return;
2058 
2059 	if (sc->smartspeed == 0) {
2060 		/* If Master/Slave config fault is asserted twice,
2061 		 * we assume back-to-back */
2062 		em_read_phy_reg(&sc->hw, PHY_1000T_STATUS, &phy_tmp);
2063 		if (!(phy_tmp & SR_1000T_MS_CONFIG_FAULT))
2064 			return;
2065 		em_read_phy_reg(&sc->hw, PHY_1000T_STATUS, &phy_tmp);
2066 		if (phy_tmp & SR_1000T_MS_CONFIG_FAULT) {
2067 			em_read_phy_reg(&sc->hw, PHY_1000T_CTRL,
2068 					&phy_tmp);
2069 			if (phy_tmp & CR_1000T_MS_ENABLE) {
2070 				phy_tmp &= ~CR_1000T_MS_ENABLE;
2071 				em_write_phy_reg(&sc->hw,
2072 						    PHY_1000T_CTRL, phy_tmp);
2073 				sc->smartspeed++;
2074 				if (sc->hw.autoneg &&
2075 				    !em_phy_setup_autoneg(&sc->hw) &&
2076 				    !em_read_phy_reg(&sc->hw, PHY_CTRL,
2077 						       &phy_tmp)) {
2078 					phy_tmp |= (MII_CR_AUTO_NEG_EN |
2079 						    MII_CR_RESTART_AUTO_NEG);
2080 					em_write_phy_reg(&sc->hw,
2081 							 PHY_CTRL, phy_tmp);
2082 				}
2083 			}
2084 		}
2085 		return;
2086 	} else if (sc->smartspeed == EM_SMARTSPEED_DOWNSHIFT) {
2087 		/* If still no link, perhaps using 2/3 pair cable */
2088 		em_read_phy_reg(&sc->hw, PHY_1000T_CTRL, &phy_tmp);
2089 		phy_tmp |= CR_1000T_MS_ENABLE;
2090 		em_write_phy_reg(&sc->hw, PHY_1000T_CTRL, phy_tmp);
2091 		if (sc->hw.autoneg &&
2092 		    !em_phy_setup_autoneg(&sc->hw) &&
2093 		    !em_read_phy_reg(&sc->hw, PHY_CTRL, &phy_tmp)) {
2094 			phy_tmp |= (MII_CR_AUTO_NEG_EN |
2095 				    MII_CR_RESTART_AUTO_NEG);
2096 			em_write_phy_reg(&sc->hw, PHY_CTRL, phy_tmp);
2097 		}
2098 	}
2099 	/* Restart process after EM_SMARTSPEED_MAX iterations */
2100 	if (sc->smartspeed++ == EM_SMARTSPEED_MAX)
2101 		sc->smartspeed = 0;
2102 }
2103 
2104 /*
2105  * Manage DMA'able memory.
2106  */
2107 int
2108 em_dma_malloc(struct em_softc *sc, bus_size_t size, struct em_dma_alloc *dma)
2109 {
2110 	int r;
2111 
2112 	r = bus_dmamap_create(sc->sc_dmat, size, 1,
2113 	    size, 0, BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, &dma->dma_map);
2114 	if (r != 0)
2115 		return (r);
2116 
2117 	r = bus_dmamem_alloc(sc->sc_dmat, size, PAGE_SIZE, 0, &dma->dma_seg,
2118 	    1, &dma->dma_nseg, BUS_DMA_WAITOK | BUS_DMA_ZERO);
2119 	if (r != 0)
2120 		goto destroy;
2121 
2122 	r = bus_dmamem_map(sc->sc_dmat, &dma->dma_seg, dma->dma_nseg, size,
2123 	    &dma->dma_vaddr, BUS_DMA_WAITOK | BUS_DMA_COHERENT);
2124 	if (r != 0)
2125 		goto free;
2126 
2127 	r = bus_dmamap_load(sc->sc_dmat, dma->dma_map, dma->dma_vaddr, size,
2128 	    NULL, BUS_DMA_WAITOK);
2129 	if (r != 0)
2130 		goto unmap;
2131 
2132 	dma->dma_size = size;
2133 	return (0);
2134 
2135 unmap:
2136 	bus_dmamem_unmap(sc->sc_dmat, dma->dma_vaddr, size);
2137 free:
2138 	bus_dmamem_free(sc->sc_dmat, &dma->dma_seg, dma->dma_nseg);
2139 destroy:
2140 	bus_dmamap_destroy(sc->sc_dmat, dma->dma_map);
2141 
2142 	return (r);
2143 }
2144 
2145 void
2146 em_dma_free(struct em_softc *sc, struct em_dma_alloc *dma)
2147 {
2148 	bus_dmamap_unload(sc->sc_dmat, dma->dma_map);
2149 	bus_dmamem_unmap(sc->sc_dmat, dma->dma_vaddr, dma->dma_size);
2150 	bus_dmamem_free(sc->sc_dmat, &dma->dma_seg, dma->dma_nseg);
2151 	bus_dmamap_destroy(sc->sc_dmat, dma->dma_map);
2152 }
2153 
2154 /*********************************************************************
2155  *
2156  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2157  *  the information needed to transmit a packet on the wire.
2158  *
2159  **********************************************************************/
2160 int
2161 em_allocate_transmit_structures(struct em_softc *sc)
2162 {
2163 	struct em_queue *que;
2164 
2165 	FOREACH_QUEUE(sc, que) {
2166 		bus_dmamap_sync(sc->sc_dmat, que->tx.sc_tx_dma.dma_map,
2167 		    0, que->tx.sc_tx_dma.dma_map->dm_mapsize,
2168 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2169 
2170 		que->tx.sc_tx_pkts_ring = mallocarray(sc->sc_tx_slots,
2171 		    sizeof(*que->tx.sc_tx_pkts_ring), M_DEVBUF, M_NOWAIT | M_ZERO);
2172 		if (que->tx.sc_tx_pkts_ring == NULL) {
2173 			printf("%s: Unable to allocate tx_buffer memory\n",
2174 			    DEVNAME(sc));
2175 			return (ENOMEM);
2176 		}
2177 	}
2178 
2179 	return (0);
2180 }
2181 
2182 /*********************************************************************
2183  *
2184  *  Allocate and initialize transmit structures.
2185  *
2186  **********************************************************************/
2187 int
2188 em_setup_transmit_structures(struct em_softc *sc)
2189 {
2190 	struct em_queue *que;
2191 	struct em_packet *pkt;
2192 	int error, i;
2193 
2194 	if ((error = em_allocate_transmit_structures(sc)) != 0)
2195 		goto fail;
2196 
2197 	FOREACH_QUEUE(sc, que) {
2198 		bzero((void *) que->tx.sc_tx_desc_ring,
2199 		    (sizeof(struct em_tx_desc)) * sc->sc_tx_slots);
2200 
2201 		for (i = 0; i < sc->sc_tx_slots; i++) {
2202 			pkt = &que->tx.sc_tx_pkts_ring[i];
2203 			error = bus_dmamap_create(sc->sc_dmat, MAX_JUMBO_FRAME_SIZE,
2204 			    EM_MAX_SCATTER / (sc->pcix_82544 ? 2 : 1),
2205 			    MAX_JUMBO_FRAME_SIZE, 0, BUS_DMA_NOWAIT, &pkt->pkt_map);
2206 			if (error != 0) {
2207 				printf("%s: Unable to create TX DMA map\n",
2208 				    DEVNAME(sc));
2209 				goto fail;
2210 			}
2211 		}
2212 
2213 		que->tx.sc_tx_desc_head = 0;
2214 		que->tx.sc_tx_desc_tail = 0;
2215 
2216 		/* Set checksum context */
2217 		que->tx.active_checksum_context = OFFLOAD_NONE;
2218 	}
2219 
2220 	return (0);
2221 
2222 fail:
2223 	em_free_transmit_structures(sc);
2224 	return (error);
2225 }
2226 
2227 /*********************************************************************
2228  *
2229  *  Enable transmit unit.
2230  *
2231  **********************************************************************/
2232 void
2233 em_initialize_transmit_unit(struct em_softc *sc)
2234 {
2235 	u_int32_t	reg_tctl, reg_tipg = 0;
2236 	u_int64_t	bus_addr;
2237 	struct em_queue *que;
2238 
2239 	INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
2240 
2241 	FOREACH_QUEUE(sc, que) {
2242 		/* Setup the Base and Length of the Tx Descriptor Ring */
2243 		bus_addr = que->tx.sc_tx_dma.dma_map->dm_segs[0].ds_addr;
2244 		E1000_WRITE_REG(&sc->hw, TDLEN(que->me),
2245 		    sc->sc_tx_slots *
2246 		    sizeof(struct em_tx_desc));
2247 		E1000_WRITE_REG(&sc->hw, TDBAH(que->me), (u_int32_t)(bus_addr >> 32));
2248 		E1000_WRITE_REG(&sc->hw, TDBAL(que->me), (u_int32_t)bus_addr);
2249 
2250 		/* Setup the HW Tx Head and Tail descriptor pointers */
2251 		E1000_WRITE_REG(&sc->hw, TDT(que->me), 0);
2252 		E1000_WRITE_REG(&sc->hw, TDH(que->me), 0);
2253 
2254 		HW_DEBUGOUT2("Base = %x, Length = %x\n",
2255 		    E1000_READ_REG(&sc->hw, TDBAL(que->me)),
2256 		    E1000_READ_REG(&sc->hw, TDLEN(que->me)));
2257 
2258 		/* Set the default values for the Tx Inter Packet Gap timer */
2259 		switch (sc->hw.mac_type) {
2260 		case em_82542_rev2_0:
2261 		case em_82542_rev2_1:
2262 			reg_tipg = DEFAULT_82542_TIPG_IPGT;
2263 			reg_tipg |= DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
2264 			reg_tipg |= DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
2265 			break;
2266 		case em_80003es2lan:
2267 			reg_tipg = DEFAULT_82543_TIPG_IPGR1;
2268 			reg_tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
2269 			break;
2270 		default:
2271 			if (sc->hw.media_type == em_media_type_fiber ||
2272 			    sc->hw.media_type == em_media_type_internal_serdes)
2273 				reg_tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
2274 			else
2275 				reg_tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
2276 			reg_tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
2277 			reg_tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
2278 		}
2279 
2280 
2281 		E1000_WRITE_REG(&sc->hw, TIPG, reg_tipg);
2282 		E1000_WRITE_REG(&sc->hw, TIDV, sc->tx_int_delay);
2283 		if (sc->hw.mac_type >= em_82540)
2284 			E1000_WRITE_REG(&sc->hw, TADV, sc->tx_abs_int_delay);
2285 
2286 		/* Setup Transmit Descriptor Base Settings */
2287 		que->tx.sc_txd_cmd = E1000_TXD_CMD_IFCS;
2288 
2289 		if (sc->hw.mac_type == em_82575 || sc->hw.mac_type == em_82580 ||
2290 		    sc->hw.mac_type == em_82576 ||
2291 		    sc->hw.mac_type == em_i210 || sc->hw.mac_type == em_i350) {
2292 			/* 82575/6 need to enable the TX queue and lack the IDE bit */
2293 			reg_tctl = E1000_READ_REG(&sc->hw, TXDCTL(que->me));
2294 			reg_tctl |= E1000_TXDCTL_QUEUE_ENABLE;
2295 			E1000_WRITE_REG(&sc->hw, TXDCTL(que->me), reg_tctl);
2296 		} else if (sc->tx_int_delay > 0)
2297 			que->tx.sc_txd_cmd |= E1000_TXD_CMD_IDE;
2298 	}
2299 
2300 	/* Program the Transmit Control Register */
2301 	reg_tctl = E1000_TCTL_PSP | E1000_TCTL_EN |
2302 		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2303 	if (sc->hw.mac_type >= em_82571)
2304 		reg_tctl |= E1000_TCTL_MULR;
2305 	if (sc->link_duplex == FULL_DUPLEX)
2306 		reg_tctl |= E1000_FDX_COLLISION_DISTANCE << E1000_COLD_SHIFT;
2307 	else
2308 		reg_tctl |= E1000_HDX_COLLISION_DISTANCE << E1000_COLD_SHIFT;
2309 	/* This write will effectively turn on the transmit unit */
2310 	E1000_WRITE_REG(&sc->hw, TCTL, reg_tctl);
2311 
2312 	/* SPT Si errata workaround to avoid data corruption */
2313 
2314 	if (sc->hw.mac_type == em_pch_spt) {
2315 		uint32_t	reg_val;
2316 
2317 		reg_val = EM_READ_REG(&sc->hw, E1000_IOSFPC);
2318 		reg_val |= E1000_RCTL_RDMTS_HEX;
2319 		EM_WRITE_REG(&sc->hw, E1000_IOSFPC, reg_val);
2320 
2321 		reg_val = E1000_READ_REG(&sc->hw, TARC0);
2322 		/* i218-i219 Specification Update 1.5.4.5 */
2323 		reg_val &= ~E1000_TARC0_CB_MULTIQ_3_REQ;
2324 		reg_val |= E1000_TARC0_CB_MULTIQ_2_REQ;
2325 		E1000_WRITE_REG(&sc->hw, TARC0, reg_val);
2326 	}
2327 }
2328 
2329 /*********************************************************************
2330  *
2331  *  Free all transmit related data structures.
2332  *
2333  **********************************************************************/
2334 void
2335 em_free_transmit_structures(struct em_softc *sc)
2336 {
2337 	struct em_queue *que;
2338 	struct em_packet *pkt;
2339 	int i;
2340 
2341 	INIT_DEBUGOUT("free_transmit_structures: begin");
2342 
2343 	FOREACH_QUEUE(sc, que) {
2344 		if (que->tx.sc_tx_pkts_ring != NULL) {
2345 			for (i = 0; i < sc->sc_tx_slots; i++) {
2346 				pkt = &que->tx.sc_tx_pkts_ring[i];
2347 
2348 				if (pkt->pkt_m != NULL) {
2349 					bus_dmamap_sync(sc->sc_dmat, pkt->pkt_map,
2350 					    0, pkt->pkt_map->dm_mapsize,
2351 					    BUS_DMASYNC_POSTWRITE);
2352 					bus_dmamap_unload(sc->sc_dmat,
2353 					    pkt->pkt_map);
2354 
2355 					m_freem(pkt->pkt_m);
2356 					pkt->pkt_m = NULL;
2357 				}
2358 
2359 				if (pkt->pkt_map != NULL) {
2360 					bus_dmamap_destroy(sc->sc_dmat,
2361 					    pkt->pkt_map);
2362 					pkt->pkt_map = NULL;
2363 				}
2364 			}
2365 
2366 			free(que->tx.sc_tx_pkts_ring, M_DEVBUF,
2367 			    sc->sc_tx_slots * sizeof(*que->tx.sc_tx_pkts_ring));
2368 			que->tx.sc_tx_pkts_ring = NULL;
2369 		}
2370 
2371 		bus_dmamap_sync(sc->sc_dmat, que->tx.sc_tx_dma.dma_map,
2372 		    0, que->tx.sc_tx_dma.dma_map->dm_mapsize,
2373 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2374 	}
2375 }
2376 
2377 /*********************************************************************
2378  *
2379  *  The offload context needs to be set when we transfer the first
2380  *  packet of a particular protocol (TCP/UDP). We change the
2381  *  context only if the protocol type changes.
2382  *
2383  **********************************************************************/
2384 u_int
2385 em_transmit_checksum_setup(struct em_queue *que, struct mbuf *mp, u_int head,
2386     u_int32_t *txd_upper, u_int32_t *txd_lower)
2387 {
2388 	struct em_context_desc *TXD;
2389 
2390 	if (mp->m_pkthdr.csum_flags & M_TCP_CSUM_OUT) {
2391 		*txd_upper = E1000_TXD_POPTS_TXSM << 8;
2392 		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
2393 		if (que->tx.active_checksum_context == OFFLOAD_TCP_IP)
2394 			return (0);
2395 		else
2396 			que->tx.active_checksum_context = OFFLOAD_TCP_IP;
2397 	} else if (mp->m_pkthdr.csum_flags & M_UDP_CSUM_OUT) {
2398 		*txd_upper = E1000_TXD_POPTS_TXSM << 8;
2399 		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
2400 		if (que->tx.active_checksum_context == OFFLOAD_UDP_IP)
2401 			return (0);
2402 		else
2403 			que->tx.active_checksum_context = OFFLOAD_UDP_IP;
2404 	} else {
2405 		*txd_upper = 0;
2406 		*txd_lower = 0;
2407 		return (0);
2408 	}
2409 
2410 	/* If we reach this point, the checksum offload context
2411 	 * needs to be reset.
2412 	 */
2413 	TXD = (struct em_context_desc *)&que->tx.sc_tx_desc_ring[head];
2414 
2415 	TXD->lower_setup.ip_fields.ipcss = ETHER_HDR_LEN;
2416 	TXD->lower_setup.ip_fields.ipcso =
2417 	    ETHER_HDR_LEN + offsetof(struct ip, ip_sum);
2418 	TXD->lower_setup.ip_fields.ipcse =
2419 	    htole16(ETHER_HDR_LEN + sizeof(struct ip) - 1);
2420 
2421 	TXD->upper_setup.tcp_fields.tucss =
2422 	    ETHER_HDR_LEN + sizeof(struct ip);
2423 	TXD->upper_setup.tcp_fields.tucse = htole16(0);
2424 
2425 	if (que->tx.active_checksum_context == OFFLOAD_TCP_IP) {
2426 		TXD->upper_setup.tcp_fields.tucso =
2427 		    ETHER_HDR_LEN + sizeof(struct ip) +
2428 		    offsetof(struct tcphdr, th_sum);
2429 	} else if (que->tx.active_checksum_context == OFFLOAD_UDP_IP) {
2430 		TXD->upper_setup.tcp_fields.tucso =
2431 		    ETHER_HDR_LEN + sizeof(struct ip) +
2432 		    offsetof(struct udphdr, uh_sum);
2433 	}
2434 
2435 	TXD->tcp_seg_setup.data = htole32(0);
2436 	TXD->cmd_and_length = htole32(que->tx.sc_txd_cmd | E1000_TXD_CMD_DEXT);
2437 
2438 	return (1);
2439 }
2440 
2441 /**********************************************************************
2442  *
2443  *  Examine each tx_buffer in the used queue. If the hardware is done
2444  *  processing the packet then free associated resources. The
2445  *  tx_buffer is put back on the free queue.
2446  *
2447  **********************************************************************/
2448 void
2449 em_txeof(struct em_queue *que)
2450 {
2451 	struct em_softc *sc = que->sc;
2452 	struct ifnet *ifp = &sc->sc_ac.ac_if;
2453 	struct em_packet *pkt;
2454 	struct em_tx_desc *desc;
2455 	u_int head, tail;
2456 	u_int free = 0;
2457 
2458 	head = que->tx.sc_tx_desc_head;
2459 	tail = que->tx.sc_tx_desc_tail;
2460 
2461 	if (head == tail)
2462 		return;
2463 
2464 	bus_dmamap_sync(sc->sc_dmat, que->tx.sc_tx_dma.dma_map,
2465 	    0, que->tx.sc_tx_dma.dma_map->dm_mapsize,
2466 	    BUS_DMASYNC_POSTREAD);
2467 
2468 	do {
2469 		pkt = &que->tx.sc_tx_pkts_ring[tail];
2470 		desc = &que->tx.sc_tx_desc_ring[pkt->pkt_eop];
2471 
2472 		if (!ISSET(desc->upper.fields.status, E1000_TXD_STAT_DD))
2473 			break;
2474 
2475 		bus_dmamap_sync(sc->sc_dmat, pkt->pkt_map,
2476 		    0, pkt->pkt_map->dm_mapsize,
2477 		    BUS_DMASYNC_POSTWRITE);
2478 		bus_dmamap_unload(sc->sc_dmat, pkt->pkt_map);
2479 
2480 		KASSERT(pkt->pkt_m != NULL);
2481 
2482 		m_freem(pkt->pkt_m);
2483 		pkt->pkt_m = NULL;
2484 
2485 		tail = pkt->pkt_eop;
2486 
2487 		if (++tail == sc->sc_tx_slots)
2488 			tail = 0;
2489 
2490 		free++;
2491 	} while (tail != head);
2492 
2493 	bus_dmamap_sync(sc->sc_dmat, que->tx.sc_tx_dma.dma_map,
2494 	    0, que->tx.sc_tx_dma.dma_map->dm_mapsize,
2495 	    BUS_DMASYNC_PREREAD);
2496 
2497 	if (free == 0)
2498 		return;
2499 
2500 	que->tx.sc_tx_desc_tail = tail;
2501 
2502 	if (ifq_is_oactive(&ifp->if_snd))
2503 		ifq_restart(&ifp->if_snd);
2504 	else if (tail == head)
2505 		ifp->if_timer = 0;
2506 }
2507 
2508 /*********************************************************************
2509  *
2510  *  Get a buffer from system mbuf buffer pool.
2511  *
2512  **********************************************************************/
2513 int
2514 em_get_buf(struct em_queue *que, int i)
2515 {
2516 	struct em_softc *sc = que->sc;
2517 	struct mbuf    *m;
2518 	struct em_packet *pkt;
2519 	struct em_rx_desc *desc;
2520 	int error;
2521 
2522 	pkt = &que->rx.sc_rx_pkts_ring[i];
2523 	desc = &que->rx.sc_rx_desc_ring[i];
2524 
2525 	KASSERT(pkt->pkt_m == NULL);
2526 
2527 	m = MCLGETL(NULL, M_DONTWAIT, EM_MCLBYTES);
2528 	if (m == NULL) {
2529 		sc->mbuf_cluster_failed++;
2530 		return (ENOBUFS);
2531 	}
2532 	m->m_len = m->m_pkthdr.len = EM_MCLBYTES;
2533 	m_adj(m, ETHER_ALIGN);
2534 
2535 	error = bus_dmamap_load_mbuf(sc->sc_dmat, pkt->pkt_map,
2536 	    m, BUS_DMA_NOWAIT);
2537 	if (error) {
2538 		m_freem(m);
2539 		return (error);
2540 	}
2541 
2542 	bus_dmamap_sync(sc->sc_dmat, pkt->pkt_map,
2543 	    0, pkt->pkt_map->dm_mapsize,
2544 	    BUS_DMASYNC_PREREAD);
2545 	pkt->pkt_m = m;
2546 
2547 	memset(desc, 0, sizeof(*desc));
2548 	htolem64(&desc->buffer_addr, pkt->pkt_map->dm_segs[0].ds_addr);
2549 
2550 	return (0);
2551 }
2552 
2553 /*********************************************************************
2554  *
2555  *  Allocate memory for rx_buffer structures. Since we use one
2556  *  rx_buffer per received packet, the maximum number of rx_buffer's
2557  *  that we'll need is equal to the number of receive descriptors
2558  *  that we've allocated.
2559  *
2560  **********************************************************************/
2561 int
2562 em_allocate_receive_structures(struct em_softc *sc)
2563 {
2564 	struct em_queue *que;
2565 	struct em_packet *pkt;
2566 	int i;
2567 	int error;
2568 
2569 	FOREACH_QUEUE(sc, que) {
2570 		que->rx.sc_rx_pkts_ring = mallocarray(sc->sc_rx_slots,
2571 		    sizeof(*que->rx.sc_rx_pkts_ring),
2572 		    M_DEVBUF, M_NOWAIT | M_ZERO);
2573 		if (que->rx.sc_rx_pkts_ring == NULL) {
2574 			printf("%s: Unable to allocate rx_buffer memory\n",
2575 			    DEVNAME(sc));
2576 			return (ENOMEM);
2577 		}
2578 
2579 		bus_dmamap_sync(sc->sc_dmat, que->rx.sc_rx_dma.dma_map,
2580 		    0, que->rx.sc_rx_dma.dma_map->dm_mapsize,
2581 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2582 
2583 		for (i = 0; i < sc->sc_rx_slots; i++) {
2584 			pkt = &que->rx.sc_rx_pkts_ring[i];
2585 
2586 			error = bus_dmamap_create(sc->sc_dmat, EM_MCLBYTES, 1,
2587 			    EM_MCLBYTES, 0, BUS_DMA_NOWAIT, &pkt->pkt_map);
2588 			if (error != 0) {
2589 				printf("%s: em_allocate_receive_structures: "
2590 				    "bus_dmamap_create failed; error %u\n",
2591 				    DEVNAME(sc), error);
2592 				goto fail;
2593 			}
2594 
2595 			pkt->pkt_m = NULL;
2596 		}
2597 	}
2598 
2599         return (0);
2600 
2601 fail:
2602 	em_free_receive_structures(sc);
2603 	return (error);
2604 }
2605 
2606 /*********************************************************************
2607  *
2608  *  Allocate and initialize receive structures.
2609  *
2610  **********************************************************************/
2611 int
2612 em_setup_receive_structures(struct em_softc *sc)
2613 {
2614 	struct ifnet *ifp = &sc->sc_ac.ac_if;
2615 	struct em_queue *que;
2616 	u_int lwm;
2617 
2618 	if (em_allocate_receive_structures(sc))
2619 		return (ENOMEM);
2620 
2621 	FOREACH_QUEUE(sc, que) {
2622 		memset(que->rx.sc_rx_desc_ring, 0,
2623 		    sc->sc_rx_slots * sizeof(*que->rx.sc_rx_desc_ring));
2624 
2625 		/* Setup our descriptor pointers */
2626 		que->rx.sc_rx_desc_tail = 0;
2627 		que->rx.sc_rx_desc_head = sc->sc_rx_slots - 1;
2628 
2629 		lwm = max(4, 2 * ((ifp->if_hardmtu / MCLBYTES) + 1));
2630 		if_rxr_init(&que->rx.sc_rx_ring, lwm, sc->sc_rx_slots);
2631 
2632 		if (em_rxfill(que) == 0) {
2633 			printf("%s: unable to fill any rx descriptors\n",
2634 			    DEVNAME(sc));
2635 			return (ENOMEM);
2636 		}
2637 	}
2638 
2639 	return (0);
2640 }
2641 
2642 /*********************************************************************
2643  *
2644  *  Enable receive unit.
2645  *
2646  **********************************************************************/
2647 void
2648 em_initialize_receive_unit(struct em_softc *sc)
2649 {
2650 	struct em_queue *que;
2651 	u_int32_t	reg_rctl;
2652 	u_int32_t	reg_rxcsum;
2653 	u_int32_t	reg_srrctl;
2654 	u_int64_t	bus_addr;
2655 
2656 	INIT_DEBUGOUT("em_initialize_receive_unit: begin");
2657 
2658 	/* Make sure receives are disabled while setting up the descriptor ring */
2659 	E1000_WRITE_REG(&sc->hw, RCTL, 0);
2660 
2661 	/* Set the Receive Delay Timer Register */
2662 	E1000_WRITE_REG(&sc->hw, RDTR,
2663 			sc->rx_int_delay | E1000_RDT_FPDB);
2664 
2665 	if (sc->hw.mac_type >= em_82540) {
2666 		if (sc->rx_int_delay)
2667 			E1000_WRITE_REG(&sc->hw, RADV, sc->rx_abs_int_delay);
2668 
2669 		/* Set the interrupt throttling rate.  Value is calculated
2670 		 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns) */
2671 		E1000_WRITE_REG(&sc->hw, ITR, DEFAULT_ITR);
2672 	}
2673 
2674 	/* Setup the Receive Control Register */
2675 	reg_rctl = E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
2676 	    E1000_RCTL_RDMTS_HALF |
2677 	    (sc->hw.mc_filter_type << E1000_RCTL_MO_SHIFT);
2678 
2679 	if (sc->hw.tbi_compatibility_on == TRUE)
2680 		reg_rctl |= E1000_RCTL_SBP;
2681 
2682 	/*
2683 	 * The i350 has a bug where it always strips the CRC whether
2684 	 * asked to or not.  So ask for stripped CRC here and
2685 	 * cope in rxeof
2686 	 */
2687 	if (sc->hw.mac_type == em_i210 || sc->hw.mac_type == em_i350)
2688 		reg_rctl |= E1000_RCTL_SECRC;
2689 
2690 	switch (sc->sc_rx_buffer_len) {
2691 	default:
2692 	case EM_RXBUFFER_2048:
2693 		reg_rctl |= E1000_RCTL_SZ_2048;
2694 		break;
2695 	case EM_RXBUFFER_4096:
2696 		reg_rctl |= E1000_RCTL_SZ_4096|E1000_RCTL_BSEX|E1000_RCTL_LPE;
2697 		break;
2698 	case EM_RXBUFFER_8192:
2699 		reg_rctl |= E1000_RCTL_SZ_8192|E1000_RCTL_BSEX|E1000_RCTL_LPE;
2700 		break;
2701 	case EM_RXBUFFER_16384:
2702 		reg_rctl |= E1000_RCTL_SZ_16384|E1000_RCTL_BSEX|E1000_RCTL_LPE;
2703 		break;
2704 	}
2705 
2706 	if (sc->hw.max_frame_size != ETHER_MAX_LEN)
2707 		reg_rctl |= E1000_RCTL_LPE;
2708 
2709 	/* Enable 82543 Receive Checksum Offload for TCP and UDP */
2710 	if (sc->hw.mac_type >= em_82543) {
2711 		reg_rxcsum = E1000_READ_REG(&sc->hw, RXCSUM);
2712 		reg_rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
2713 		E1000_WRITE_REG(&sc->hw, RXCSUM, reg_rxcsum);
2714 	}
2715 
2716 	/*
2717 	 * XXX TEMPORARY WORKAROUND: on some systems with 82573
2718 	 * long latencies are observed, like Lenovo X60.
2719 	 */
2720 	if (sc->hw.mac_type == em_82573)
2721 		E1000_WRITE_REG(&sc->hw, RDTR, 0x20);
2722 
2723 	FOREACH_QUEUE(sc, que) {
2724 		if (sc->num_queues > 1) {
2725 			/*
2726 			 * Disable Drop Enable for every queue, default has
2727 			 * it enabled for queues > 0
2728 			 */
2729 			reg_srrctl = E1000_READ_REG(&sc->hw, SRRCTL(que->me));
2730 			reg_srrctl &= ~E1000_SRRCTL_DROP_EN;
2731 			E1000_WRITE_REG(&sc->hw, SRRCTL(que->me), reg_srrctl);
2732 		}
2733 
2734 		/* Setup the Base and Length of the Rx Descriptor Ring */
2735 		bus_addr = que->rx.sc_rx_dma.dma_map->dm_segs[0].ds_addr;
2736 		E1000_WRITE_REG(&sc->hw, RDLEN(que->me),
2737 		    sc->sc_rx_slots * sizeof(*que->rx.sc_rx_desc_ring));
2738 		E1000_WRITE_REG(&sc->hw, RDBAH(que->me), (u_int32_t)(bus_addr >> 32));
2739 		E1000_WRITE_REG(&sc->hw, RDBAL(que->me), (u_int32_t)bus_addr);
2740 
2741 		if (sc->hw.mac_type == em_82575 || sc->hw.mac_type == em_82580 ||
2742 		    sc->hw.mac_type == em_82576 ||
2743 		    sc->hw.mac_type == em_i210 || sc->hw.mac_type == em_i350) {
2744 			/* 82575/6 need to enable the RX queue */
2745 			uint32_t reg;
2746 			reg = E1000_READ_REG(&sc->hw, RXDCTL(que->me));
2747 			reg |= E1000_RXDCTL_QUEUE_ENABLE;
2748 			E1000_WRITE_REG(&sc->hw, RXDCTL(que->me), reg);
2749 		}
2750 	}
2751 
2752 	/* Enable Receives */
2753 	E1000_WRITE_REG(&sc->hw, RCTL, reg_rctl);
2754 
2755 	/* Setup the HW Rx Head and Tail Descriptor Pointers */
2756 	FOREACH_QUEUE(sc, que) {
2757 		E1000_WRITE_REG(&sc->hw, RDH(que->me), 0);
2758 		E1000_WRITE_REG(&sc->hw, RDT(que->me), que->rx.sc_rx_desc_head);
2759 	}
2760 }
2761 
2762 /*********************************************************************
2763  *
2764  *  Free receive related data structures.
2765  *
2766  **********************************************************************/
2767 void
2768 em_free_receive_structures(struct em_softc *sc)
2769 {
2770 	struct em_queue *que;
2771 	struct em_packet *pkt;
2772 	int i;
2773 
2774 	INIT_DEBUGOUT("free_receive_structures: begin");
2775 
2776 	FOREACH_QUEUE(sc, que) {
2777 		if_rxr_init(&que->rx.sc_rx_ring, 0, 0);
2778 
2779 		bus_dmamap_sync(sc->sc_dmat, que->rx.sc_rx_dma.dma_map,
2780 		    0, que->rx.sc_rx_dma.dma_map->dm_mapsize,
2781 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2782 
2783 		if (que->rx.sc_rx_pkts_ring != NULL) {
2784 			for (i = 0; i < sc->sc_rx_slots; i++) {
2785 				pkt = &que->rx.sc_rx_pkts_ring[i];
2786 				if (pkt->pkt_m != NULL) {
2787 					bus_dmamap_sync(sc->sc_dmat,
2788 					    pkt->pkt_map,
2789 					    0, pkt->pkt_map->dm_mapsize,
2790 					    BUS_DMASYNC_POSTREAD);
2791 					bus_dmamap_unload(sc->sc_dmat,
2792 					    pkt->pkt_map);
2793 					m_freem(pkt->pkt_m);
2794 					pkt->pkt_m = NULL;
2795 				}
2796 				bus_dmamap_destroy(sc->sc_dmat, pkt->pkt_map);
2797 			}
2798 
2799 			free(que->rx.sc_rx_pkts_ring, M_DEVBUF,
2800 			    sc->sc_rx_slots * sizeof(*que->rx.sc_rx_pkts_ring));
2801 			que->rx.sc_rx_pkts_ring = NULL;
2802 		}
2803 
2804 		if (que->rx.fmp != NULL) {
2805 			m_freem(que->rx.fmp);
2806 			que->rx.fmp = NULL;
2807 			que->rx.lmp = NULL;
2808 		}
2809 	}
2810 }
2811 
2812 int
2813 em_rxfill(struct em_queue *que)
2814 {
2815 	struct em_softc *sc = que->sc;
2816 	u_int slots;
2817 	int post = 0;
2818 	int i;
2819 
2820 	i = que->rx.sc_rx_desc_head;
2821 
2822 	bus_dmamap_sync(sc->sc_dmat, que->rx.sc_rx_dma.dma_map,
2823 	    0, que->rx.sc_rx_dma.dma_map->dm_mapsize,
2824 	    BUS_DMASYNC_POSTWRITE);
2825 
2826 	for (slots = if_rxr_get(&que->rx.sc_rx_ring, sc->sc_rx_slots);
2827 	    slots > 0; slots--) {
2828 		if (++i == sc->sc_rx_slots)
2829 			i = 0;
2830 
2831 		if (em_get_buf(que, i) != 0)
2832 			break;
2833 
2834 		que->rx.sc_rx_desc_head = i;
2835 		post = 1;
2836 	}
2837 
2838 	if_rxr_put(&que->rx.sc_rx_ring, slots);
2839 
2840 	bus_dmamap_sync(sc->sc_dmat, que->rx.sc_rx_dma.dma_map,
2841 	    0, que->rx.sc_rx_dma.dma_map->dm_mapsize,
2842 	    BUS_DMASYNC_PREWRITE);
2843 
2844 	return (post);
2845 }
2846 
2847 void
2848 em_rxrefill(void *arg)
2849 {
2850 	struct em_queue *que = arg;
2851 	struct em_softc *sc = que->sc;
2852 
2853 	if (em_rxfill(que))
2854 		E1000_WRITE_REG(&sc->hw, RDT(que->me), que->rx.sc_rx_desc_head);
2855 	else if (if_rxr_needrefill(&que->rx.sc_rx_ring))
2856 		timeout_add(&que->rx_refill, 1);
2857 }
2858 
2859 /*********************************************************************
2860  *
2861  *  This routine executes in interrupt context. It replenishes
2862  *  the mbufs in the descriptor and sends data which has been
2863  *  dma'ed into host memory to upper layer.
2864  *
2865  *********************************************************************/
2866 int
2867 em_rxeof(struct em_queue *que)
2868 {
2869 	struct em_softc	    *sc = que->sc;
2870 	struct ifnet	    *ifp = &sc->sc_ac.ac_if;
2871 	struct mbuf_list    ml = MBUF_LIST_INITIALIZER();
2872 	struct mbuf	    *m;
2873 	u_int8_t	    accept_frame = 0;
2874 	u_int8_t	    eop = 0;
2875 	u_int16_t	    len, desc_len, prev_len_adj;
2876 	int		    i, rv = 0;
2877 
2878 	/* Pointer to the receive descriptor being examined. */
2879 	struct em_rx_desc   *desc;
2880 	struct em_packet    *pkt;
2881 	u_int8_t	    status;
2882 
2883 	if (if_rxr_inuse(&que->rx.sc_rx_ring) == 0)
2884 		return (0);
2885 
2886 	i = que->rx.sc_rx_desc_tail;
2887 
2888 	bus_dmamap_sync(sc->sc_dmat, que->rx.sc_rx_dma.dma_map,
2889 	    0, que->rx.sc_rx_dma.dma_map->dm_mapsize,
2890 	    BUS_DMASYNC_POSTREAD);
2891 
2892 	do {
2893 		m = NULL;
2894 
2895 		pkt = &que->rx.sc_rx_pkts_ring[i];
2896 		desc = &que->rx.sc_rx_desc_ring[i];
2897 
2898 		status = desc->status;
2899 		if (!ISSET(status, E1000_RXD_STAT_DD))
2900 			break;
2901 
2902 		/* pull the mbuf off the ring */
2903 		bus_dmamap_sync(sc->sc_dmat, pkt->pkt_map,
2904 		    0, pkt->pkt_map->dm_mapsize,
2905 		    BUS_DMASYNC_POSTREAD);
2906 		bus_dmamap_unload(sc->sc_dmat, pkt->pkt_map);
2907 		m = pkt->pkt_m;
2908 		pkt->pkt_m = NULL;
2909 
2910 		KASSERT(m != NULL);
2911 
2912 		if_rxr_put(&que->rx.sc_rx_ring, 1);
2913 		rv = 1;
2914 
2915 		accept_frame = 1;
2916 		prev_len_adj = 0;
2917 		desc_len = letoh16(desc->length);
2918 
2919 		if (status & E1000_RXD_STAT_EOP) {
2920 			eop = 1;
2921 			if (desc_len < ETHER_CRC_LEN) {
2922 				len = 0;
2923 				prev_len_adj = ETHER_CRC_LEN - desc_len;
2924 			} else if (sc->hw.mac_type == em_i210 ||
2925 			    sc->hw.mac_type == em_i350)
2926 				len = desc_len;
2927 			else
2928 				len = desc_len - ETHER_CRC_LEN;
2929 		} else {
2930 			eop = 0;
2931 			len = desc_len;
2932 		}
2933 
2934 		if (desc->errors & E1000_RXD_ERR_FRAME_ERR_MASK) {
2935 			u_int8_t last_byte;
2936 			u_int32_t pkt_len = desc_len;
2937 
2938 			if (que->rx.fmp != NULL)
2939 				pkt_len += que->rx.fmp->m_pkthdr.len;
2940 
2941 			last_byte = *(mtod(m, caddr_t) + desc_len - 1);
2942 			if (TBI_ACCEPT(&sc->hw, status, desc->errors,
2943 			    pkt_len, last_byte)) {
2944 #if NKSTAT > 0
2945 				em_tbi_adjust_stats(sc,
2946 				    pkt_len, sc->hw.mac_addr);
2947 #endif
2948 				if (len > 0)
2949 					len--;
2950 			} else
2951 				accept_frame = 0;
2952 		}
2953 
2954 		if (accept_frame) {
2955 			/* Assign correct length to the current fragment */
2956 			m->m_len = len;
2957 
2958 			if (que->rx.fmp == NULL) {
2959 				m->m_pkthdr.len = m->m_len;
2960 				que->rx.fmp = m;	 /* Store the first mbuf */
2961 				que->rx.lmp = m;
2962 			} else {
2963 				/* Chain mbuf's together */
2964 				m->m_flags &= ~M_PKTHDR;
2965 				/*
2966 				 * Adjust length of previous mbuf in chain if
2967 				 * we received less than 4 bytes in the last
2968 				 * descriptor.
2969 				 */
2970 				if (prev_len_adj > 0) {
2971 					que->rx.lmp->m_len -= prev_len_adj;
2972 					que->rx.fmp->m_pkthdr.len -= prev_len_adj;
2973 				}
2974 				que->rx.lmp->m_next = m;
2975 				que->rx.lmp = m;
2976 				que->rx.fmp->m_pkthdr.len += m->m_len;
2977 			}
2978 
2979 			if (eop) {
2980 				m = que->rx.fmp;
2981 
2982 				em_receive_checksum(sc, desc, m);
2983 #if NVLAN > 0
2984 				if (desc->status & E1000_RXD_STAT_VP) {
2985 					m->m_pkthdr.ether_vtag =
2986 					    letoh16(desc->special);
2987 					m->m_flags |= M_VLANTAG;
2988 				}
2989 #endif
2990 				ml_enqueue(&ml, m);
2991 
2992 				que->rx.fmp = NULL;
2993 				que->rx.lmp = NULL;
2994 			}
2995 		} else {
2996 			que->rx.dropped_pkts++;
2997 
2998 			if (que->rx.fmp != NULL) {
2999 				m_freem(que->rx.fmp);
3000 				que->rx.fmp = NULL;
3001 				que->rx.lmp = NULL;
3002 			}
3003 
3004 			m_freem(m);
3005 		}
3006 
3007 		/* Advance our pointers to the next descriptor. */
3008 		if (++i == sc->sc_rx_slots)
3009 			i = 0;
3010 	} while (if_rxr_inuse(&que->rx.sc_rx_ring) > 0);
3011 
3012 	bus_dmamap_sync(sc->sc_dmat, que->rx.sc_rx_dma.dma_map,
3013 	    0, que->rx.sc_rx_dma.dma_map->dm_mapsize,
3014 	    BUS_DMASYNC_PREREAD);
3015 
3016 	que->rx.sc_rx_desc_tail = i;
3017 
3018 	if (ifiq_input(&ifp->if_rcv, &ml))
3019 		if_rxr_livelocked(&que->rx.sc_rx_ring);
3020 
3021 	return (rv);
3022 }
3023 
3024 /*********************************************************************
3025  *
3026  *  Verify that the hardware indicated that the checksum is valid.
3027  *  Inform the stack about the status of checksum so that stack
3028  *  doesn't spend time verifying the checksum.
3029  *
3030  *********************************************************************/
3031 void
3032 em_receive_checksum(struct em_softc *sc, struct em_rx_desc *rx_desc,
3033     struct mbuf *mp)
3034 {
3035 	/* 82543 or newer only */
3036 	if ((sc->hw.mac_type < em_82543) ||
3037 	    /* Ignore Checksum bit is set */
3038 	    (rx_desc->status & E1000_RXD_STAT_IXSM)) {
3039 		mp->m_pkthdr.csum_flags = 0;
3040 		return;
3041 	}
3042 
3043 	if (rx_desc->status & E1000_RXD_STAT_IPCS) {
3044 		/* Did it pass? */
3045 		if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
3046 			/* IP Checksum Good */
3047 			mp->m_pkthdr.csum_flags = M_IPV4_CSUM_IN_OK;
3048 
3049 		} else
3050 			mp->m_pkthdr.csum_flags = 0;
3051 	}
3052 
3053 	if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
3054 		/* Did it pass? */
3055 		if (!(rx_desc->errors & E1000_RXD_ERR_TCPE))
3056 			mp->m_pkthdr.csum_flags |=
3057 				M_TCP_CSUM_IN_OK | M_UDP_CSUM_IN_OK;
3058 	}
3059 }
3060 
3061 /*
3062  * This turns on the hardware offload of the VLAN
3063  * tag insertion and strip
3064  */
3065 void
3066 em_enable_hw_vlans(struct em_softc *sc)
3067 {
3068 	uint32_t ctrl;
3069 
3070 	ctrl = E1000_READ_REG(&sc->hw, CTRL);
3071 	ctrl |= E1000_CTRL_VME;
3072 	E1000_WRITE_REG(&sc->hw, CTRL, ctrl);
3073 }
3074 
3075 void
3076 em_enable_intr(struct em_softc *sc)
3077 {
3078 	uint32_t mask;
3079 
3080 	if (sc->msix) {
3081 		mask = sc->msix_queuesmask | sc->msix_linkmask;
3082 		E1000_WRITE_REG(&sc->hw, EIAC, mask);
3083 		E1000_WRITE_REG(&sc->hw, EIAM, mask);
3084 		E1000_WRITE_REG(&sc->hw, EIMS, mask);
3085 		E1000_WRITE_REG(&sc->hw, IMS, E1000_IMS_LSC);
3086 	} else
3087 		E1000_WRITE_REG(&sc->hw, IMS, (IMS_ENABLE_MASK));
3088 }
3089 
3090 void
3091 em_disable_intr(struct em_softc *sc)
3092 {
3093 	/*
3094 	 * The first version of 82542 had an errata where when link
3095 	 * was forced it would stay up even if the cable was disconnected
3096 	 * Sequence errors were used to detect the disconnect and then
3097 	 * the driver would unforce the link.  This code is in the ISR.
3098 	 * For this to work correctly the Sequence error interrupt had
3099 	 * to be enabled all the time.
3100 	 */
3101 	if (sc->msix) {
3102 		E1000_WRITE_REG(&sc->hw, EIMC, ~0);
3103 		E1000_WRITE_REG(&sc->hw, EIAC, 0);
3104 	} else if (sc->hw.mac_type == em_82542_rev2_0)
3105 		E1000_WRITE_REG(&sc->hw, IMC, (0xffffffff & ~E1000_IMC_RXSEQ));
3106 	else
3107 		E1000_WRITE_REG(&sc->hw, IMC, 0xffffffff);
3108 }
3109 
3110 void
3111 em_write_pci_cfg(struct em_hw *hw, uint32_t reg, uint16_t *value)
3112 {
3113 	struct pci_attach_args *pa = &((struct em_osdep *)hw->back)->em_pa;
3114 	pcireg_t val;
3115 
3116 	val = pci_conf_read(pa->pa_pc, pa->pa_tag, reg & ~0x3);
3117 	if (reg & 0x2) {
3118 		val &= 0x0000ffff;
3119 		val |= (*value << 16);
3120 	} else {
3121 		val &= 0xffff0000;
3122 		val |= *value;
3123 	}
3124 	pci_conf_write(pa->pa_pc, pa->pa_tag, reg & ~0x3, val);
3125 }
3126 
3127 void
3128 em_read_pci_cfg(struct em_hw *hw, uint32_t reg, uint16_t *value)
3129 {
3130 	struct pci_attach_args *pa = &((struct em_osdep *)hw->back)->em_pa;
3131 	pcireg_t val;
3132 
3133 	val = pci_conf_read(pa->pa_pc, pa->pa_tag, reg & ~0x3);
3134 	if (reg & 0x2)
3135 		*value = (val >> 16) & 0xffff;
3136 	else
3137 		*value = val & 0xffff;
3138 }
3139 
3140 void
3141 em_pci_set_mwi(struct em_hw *hw)
3142 {
3143 	struct pci_attach_args *pa = &((struct em_osdep *)hw->back)->em_pa;
3144 
3145 	pci_conf_write(pa->pa_pc, pa->pa_tag, PCI_COMMAND_STATUS_REG,
3146 		(hw->pci_cmd_word | CMD_MEM_WRT_INVALIDATE));
3147 }
3148 
3149 void
3150 em_pci_clear_mwi(struct em_hw *hw)
3151 {
3152 	struct pci_attach_args *pa = &((struct em_osdep *)hw->back)->em_pa;
3153 
3154 	pci_conf_write(pa->pa_pc, pa->pa_tag, PCI_COMMAND_STATUS_REG,
3155 		(hw->pci_cmd_word & ~CMD_MEM_WRT_INVALIDATE));
3156 }
3157 
3158 /*
3159  * We may eventually really do this, but its unnecessary
3160  * for now so we just return unsupported.
3161  */
3162 int32_t
3163 em_read_pcie_cap_reg(struct em_hw *hw, uint32_t reg, uint16_t *value)
3164 {
3165 	return -E1000_NOT_IMPLEMENTED;
3166 }
3167 
3168 /*********************************************************************
3169 * 82544 Coexistence issue workaround.
3170 *    There are 2 issues.
3171 *       1. Transmit Hang issue.
3172 *    To detect this issue, following equation can be used...
3173 *          SIZE[3:0] + ADDR[2:0] = SUM[3:0].
3174 *          If SUM[3:0] is in between 1 to 4, we will have this issue.
3175 *
3176 *       2. DAC issue.
3177 *    To detect this issue, following equation can be used...
3178 *          SIZE[3:0] + ADDR[2:0] = SUM[3:0].
3179 *          If SUM[3:0] is in between 9 to c, we will have this issue.
3180 *
3181 *
3182 *    WORKAROUND:
3183 *          Make sure we do not have ending address as 1,2,3,4(Hang) or 9,a,b,c (DAC)
3184 *
3185 *** *********************************************************************/
3186 u_int32_t
3187 em_fill_descriptors(u_int64_t address, u_int32_t length,
3188     PDESC_ARRAY desc_array)
3189 {
3190         /* Since issue is sensitive to length and address.*/
3191         /* Let us first check the address...*/
3192         u_int32_t safe_terminator;
3193         if (length <= 4) {
3194                 desc_array->descriptor[0].address = address;
3195                 desc_array->descriptor[0].length = length;
3196                 desc_array->elements = 1;
3197                 return desc_array->elements;
3198         }
3199         safe_terminator = (u_int32_t)((((u_int32_t)address & 0x7) + (length & 0xF)) & 0xF);
3200         /* if it does not fall between 0x1 to 0x4 and 0x9 to 0xC then return */
3201         if (safe_terminator == 0   ||
3202         (safe_terminator > 4   &&
3203         safe_terminator < 9)   ||
3204         (safe_terminator > 0xC &&
3205         safe_terminator <= 0xF)) {
3206                 desc_array->descriptor[0].address = address;
3207                 desc_array->descriptor[0].length = length;
3208                 desc_array->elements = 1;
3209                 return desc_array->elements;
3210         }
3211 
3212         desc_array->descriptor[0].address = address;
3213         desc_array->descriptor[0].length = length - 4;
3214         desc_array->descriptor[1].address = address + (length - 4);
3215         desc_array->descriptor[1].length = 4;
3216         desc_array->elements = 2;
3217         return desc_array->elements;
3218 }
3219 
3220 /*
3221  * Disable the L0S and L1 LINK states.
3222  */
3223 void
3224 em_disable_aspm(struct em_softc *sc)
3225 {
3226 	int offset;
3227 	pcireg_t val;
3228 
3229 	switch (sc->hw.mac_type) {
3230 		case em_82571:
3231 		case em_82572:
3232 		case em_82573:
3233 		case em_82574:
3234 			break;
3235 		default:
3236 			return;
3237 	}
3238 
3239 	if (!pci_get_capability(sc->osdep.em_pa.pa_pc, sc->osdep.em_pa.pa_tag,
3240 	    PCI_CAP_PCIEXPRESS, &offset, NULL))
3241 		return;
3242 
3243 	/* Disable PCIe Active State Power Management (ASPM). */
3244 	val = pci_conf_read(sc->osdep.em_pa.pa_pc, sc->osdep.em_pa.pa_tag,
3245 	    offset + PCI_PCIE_LCSR);
3246 
3247 	switch (sc->hw.mac_type) {
3248 		case em_82571:
3249 		case em_82572:
3250 			val &= ~PCI_PCIE_LCSR_ASPM_L1;
3251 			break;
3252 		case em_82573:
3253 		case em_82574:
3254 			val &= ~(PCI_PCIE_LCSR_ASPM_L0S |
3255 			    PCI_PCIE_LCSR_ASPM_L1);
3256 			break;
3257 		default:
3258 			break;
3259 	}
3260 
3261 	pci_conf_write(sc->osdep.em_pa.pa_pc, sc->osdep.em_pa.pa_tag,
3262 	    offset + PCI_PCIE_LCSR, val);
3263 }
3264 
3265 /*
3266  * em_flush_tx_ring - remove all descriptors from the tx_ring
3267  *
3268  * We want to clear all pending descriptors from the TX ring.
3269  * zeroing happens when the HW reads the regs. We assign the ring itself as
3270  * the data of the next descriptor. We don't care about the data we are about
3271  * to reset the HW.
3272  */
3273 void
3274 em_flush_tx_ring(struct em_queue *que)
3275 {
3276 	struct em_softc		*sc = que->sc;
3277 	uint32_t		 tctl, txd_lower = E1000_TXD_CMD_IFCS;
3278 	uint16_t		 size = 512;
3279 	struct em_tx_desc	*txd;
3280 
3281 	KASSERT(que->tx.sc_tx_desc_ring != NULL);
3282 
3283 	tctl = EM_READ_REG(&sc->hw, E1000_TCTL);
3284 	EM_WRITE_REG(&sc->hw, E1000_TCTL, tctl | E1000_TCTL_EN);
3285 
3286 	KASSERT(EM_READ_REG(&sc->hw, E1000_TDT(que->me)) == que->tx.sc_tx_desc_head);
3287 
3288 	txd = &que->tx.sc_tx_desc_ring[que->tx.sc_tx_desc_head];
3289 	txd->buffer_addr = que->tx.sc_tx_dma.dma_map->dm_segs[0].ds_addr;
3290 	txd->lower.data = htole32(txd_lower | size);
3291 	txd->upper.data = 0;
3292 
3293 	/* flush descriptors to memory before notifying the HW */
3294 	bus_space_barrier(sc->osdep.mem_bus_space_tag,
3295 	    sc->osdep.mem_bus_space_handle, 0, 0, BUS_SPACE_BARRIER_WRITE);
3296 
3297 	if (++que->tx.sc_tx_desc_head == sc->sc_tx_slots)
3298 		que->tx.sc_tx_desc_head = 0;
3299 
3300 	EM_WRITE_REG(&sc->hw, E1000_TDT(que->me), que->tx.sc_tx_desc_head);
3301 	bus_space_barrier(sc->osdep.mem_bus_space_tag, sc->osdep.mem_bus_space_handle,
3302 	    0, 0, BUS_SPACE_BARRIER_READ|BUS_SPACE_BARRIER_WRITE);
3303 	usec_delay(250);
3304 }
3305 
3306 /*
3307  * em_flush_rx_ring - remove all descriptors from the rx_ring
3308  *
3309  * Mark all descriptors in the RX ring as consumed and disable the rx ring
3310  */
3311 void
3312 em_flush_rx_ring(struct em_queue *que)
3313 {
3314 	uint32_t	rctl, rxdctl;
3315 	struct em_softc	*sc = que->sc;
3316 
3317 	rctl = EM_READ_REG(&sc->hw, E1000_RCTL);
3318 	EM_WRITE_REG(&sc->hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
3319 	E1000_WRITE_FLUSH(&sc->hw);
3320 	usec_delay(150);
3321 
3322 	rxdctl = EM_READ_REG(&sc->hw, E1000_RXDCTL(que->me));
3323 	/* zero the lower 14 bits (prefetch and host thresholds) */
3324 	rxdctl &= 0xffffc000;
3325 	/*
3326 	 * update thresholds: prefetch threshold to 31, host threshold to 1
3327 	 * and make sure the granularity is "descriptors" and not "cache lines"
3328 	 */
3329 	rxdctl |= (0x1F | (1 << 8) | E1000_RXDCTL_THRESH_UNIT_DESC);
3330 	EM_WRITE_REG(&sc->hw, E1000_RXDCTL(que->me), rxdctl);
3331 
3332 	/* momentarily enable the RX ring for the changes to take effect */
3333 	EM_WRITE_REG(&sc->hw, E1000_RCTL, rctl | E1000_RCTL_EN);
3334 	E1000_WRITE_FLUSH(&sc->hw);
3335 	usec_delay(150);
3336 	EM_WRITE_REG(&sc->hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
3337 }
3338 
3339 /*
3340  * em_flush_desc_rings - remove all descriptors from the descriptor rings
3341  *
3342  * In i219, the descriptor rings must be emptied before resetting the HW
3343  * or before changing the device state to D3 during runtime (runtime PM).
3344  *
3345  * Failure to do this will cause the HW to enter a unit hang state which can
3346  * only be released by PCI reset on the device
3347  *
3348  */
3349 void
3350 em_flush_desc_rings(struct em_softc *sc)
3351 {
3352 	struct em_queue		*que = sc->queues; /* Use only first queue. */
3353 	struct pci_attach_args	*pa = &sc->osdep.em_pa;
3354 	uint32_t		 fextnvm11, tdlen;
3355 	uint16_t		 hang_state;
3356 
3357 	/* First, disable MULR fix in FEXTNVM11 */
3358 	fextnvm11 = EM_READ_REG(&sc->hw, E1000_FEXTNVM11);
3359 	fextnvm11 |= E1000_FEXTNVM11_DISABLE_MULR_FIX;
3360 	EM_WRITE_REG(&sc->hw, E1000_FEXTNVM11, fextnvm11);
3361 
3362 	/* do nothing if we're not in faulty state, or if the queue is empty */
3363 	tdlen = EM_READ_REG(&sc->hw, E1000_TDLEN(que->me));
3364 	hang_state = pci_conf_read(pa->pa_pc, pa->pa_tag, PCICFG_DESC_RING_STATUS);
3365 	if (!(hang_state & FLUSH_DESC_REQUIRED) || !tdlen)
3366 		return;
3367 	em_flush_tx_ring(que);
3368 
3369 	/* recheck, maybe the fault is caused by the rx ring */
3370 	hang_state = pci_conf_read(pa->pa_pc, pa->pa_tag, PCICFG_DESC_RING_STATUS);
3371 	if (hang_state & FLUSH_DESC_REQUIRED)
3372 		em_flush_rx_ring(que);
3373 }
3374 
3375 int
3376 em_allocate_legacy(struct em_softc *sc)
3377 {
3378 	pci_intr_handle_t	 ih;
3379 	const char		*intrstr = NULL;
3380 	struct pci_attach_args	*pa = &sc->osdep.em_pa;
3381 	pci_chipset_tag_t	 pc = pa->pa_pc;
3382 
3383 	if (pci_intr_map_msi(pa, &ih)) {
3384 		if (pci_intr_map(pa, &ih)) {
3385 			printf(": couldn't map interrupt\n");
3386 			return (ENXIO);
3387 		}
3388 		sc->legacy_irq = 1;
3389 	}
3390 
3391 	intrstr = pci_intr_string(pc, ih);
3392 	sc->sc_intrhand = pci_intr_establish(pc, ih, IPL_NET | IPL_MPSAFE,
3393 	    em_intr, sc, DEVNAME(sc));
3394 	if (sc->sc_intrhand == NULL) {
3395 		printf(": couldn't establish interrupt");
3396 		if (intrstr != NULL)
3397 			printf(" at %s", intrstr);
3398 		printf("\n");
3399 		return (ENXIO);
3400 	}
3401 	printf(": %s", intrstr);
3402 
3403 	return (0);
3404 }
3405 
3406 #if NKSTAT > 0
3407 /* this is used to look up the array of kstats quickly */
3408 enum em_stat {
3409 	em_stat_crcerrs,
3410 	em_stat_algnerrc,
3411 	em_stat_symerrs,
3412 	em_stat_rxerrc,
3413 	em_stat_mpc,
3414 	em_stat_scc,
3415 	em_stat_ecol,
3416 	em_stat_mcc,
3417 	em_stat_latecol,
3418 	em_stat_colc,
3419 	em_stat_dc,
3420 	em_stat_tncrs,
3421 	em_stat_sec,
3422 	em_stat_cexterr,
3423 	em_stat_rlec,
3424 	em_stat_xonrxc,
3425 	em_stat_xontxc,
3426 	em_stat_xoffrxc,
3427 	em_stat_xofftxc,
3428 	em_stat_fcruc,
3429 	em_stat_prc64,
3430 	em_stat_prc127,
3431 	em_stat_prc255,
3432 	em_stat_prc511,
3433 	em_stat_prc1023,
3434 	em_stat_prc1522,
3435 	em_stat_gprc,
3436 	em_stat_bprc,
3437 	em_stat_mprc,
3438 	em_stat_gptc,
3439 	em_stat_gorc,
3440 	em_stat_gotc,
3441 	em_stat_rnbc,
3442 	em_stat_ruc,
3443 	em_stat_rfc,
3444 	em_stat_roc,
3445 	em_stat_rjc,
3446 	em_stat_mgtprc,
3447 	em_stat_mgtpdc,
3448 	em_stat_mgtptc,
3449 	em_stat_tor,
3450 	em_stat_tot,
3451 	em_stat_tpr,
3452 	em_stat_tpt,
3453 	em_stat_ptc64,
3454 	em_stat_ptc127,
3455 	em_stat_ptc255,
3456 	em_stat_ptc511,
3457 	em_stat_ptc1023,
3458 	em_stat_ptc1522,
3459 	em_stat_mptc,
3460 	em_stat_bptc,
3461 #if 0
3462 	em_stat_tsctc,
3463 	em_stat_tsctf,
3464 #endif
3465 
3466 	em_stat_count,
3467 };
3468 
3469 struct em_counter {
3470 	const char		*name;
3471 	enum kstat_kv_unit	 unit;
3472 	uint32_t		 reg;
3473 };
3474 
3475 static const struct em_counter em_counters[em_stat_count] = {
3476 	[em_stat_crcerrs] =
3477 	    { "rx crc errs",	KSTAT_KV_U_PACKETS,	E1000_CRCERRS },
3478 	[em_stat_algnerrc] = /* >= em_82543 */
3479 	    { "rx align errs",	KSTAT_KV_U_PACKETS,	0 },
3480 	[em_stat_symerrs] = /* >= em_82543 */
3481 	    { "rx align errs",	KSTAT_KV_U_PACKETS,	0 },
3482 	[em_stat_rxerrc] =
3483 	    { "rx errs",	KSTAT_KV_U_PACKETS,	E1000_RXERRC },
3484 	[em_stat_mpc] =
3485 	    { "rx missed",	KSTAT_KV_U_PACKETS,	E1000_MPC },
3486 	[em_stat_scc] =
3487 	    { "tx single coll",	KSTAT_KV_U_PACKETS,	E1000_SCC },
3488 	[em_stat_ecol] =
3489 	    { "tx excess coll",	KSTAT_KV_U_PACKETS,	E1000_ECOL },
3490 	[em_stat_mcc] =
3491 	    { "tx multi coll",	KSTAT_KV_U_PACKETS,	E1000_MCC },
3492 	[em_stat_latecol] =
3493 	    { "tx late coll",	KSTAT_KV_U_PACKETS,	E1000_LATECOL },
3494 	[em_stat_colc] =
3495 	    { "tx coll",	KSTAT_KV_U_NONE,	E1000_COLC },
3496 	[em_stat_dc] =
3497 	    { "tx defers",	KSTAT_KV_U_NONE,	E1000_DC },
3498 	[em_stat_tncrs] = /* >= em_82543 */
3499 	    { "tx no CRS",	KSTAT_KV_U_PACKETS,	0 },
3500 	[em_stat_sec] =
3501 	    { "seq errs",	KSTAT_KV_U_NONE,	E1000_SEC },
3502 	[em_stat_cexterr] = /* >= em_82543 */
3503 	    { "carr ext errs",	KSTAT_KV_U_PACKETS,	0 },
3504 	[em_stat_rlec] =
3505 	    { "rx len errs",	KSTAT_KV_U_PACKETS,	E1000_RLEC },
3506 	[em_stat_xonrxc] =
3507 	    { "rx xon",		KSTAT_KV_U_PACKETS,	E1000_XONRXC },
3508 	[em_stat_xontxc] =
3509 	    { "tx xon",		KSTAT_KV_U_PACKETS,	E1000_XONTXC },
3510 	[em_stat_xoffrxc] =
3511 	    { "rx xoff",	KSTAT_KV_U_PACKETS,	E1000_XOFFRXC },
3512 	[em_stat_xofftxc] =
3513 	    { "tx xoff",	KSTAT_KV_U_PACKETS,	E1000_XOFFTXC },
3514 	[em_stat_fcruc] =
3515 	    { "FC unsupported",	KSTAT_KV_U_PACKETS,	E1000_FCRUC },
3516 	[em_stat_prc64] =
3517 	    { "rx 64B",		KSTAT_KV_U_PACKETS,	E1000_PRC64 },
3518 	[em_stat_prc127] =
3519 	    { "rx 65-127B",	KSTAT_KV_U_PACKETS,	E1000_PRC127 },
3520 	[em_stat_prc255] =
3521 	    { "rx 128-255B",	KSTAT_KV_U_PACKETS,	E1000_PRC255 },
3522 	[em_stat_prc511] =
3523 	    { "rx 256-511B",	KSTAT_KV_U_PACKETS,	E1000_PRC511 },
3524 	[em_stat_prc1023] =
3525 	    { "rx 512-1023B",	KSTAT_KV_U_PACKETS,	E1000_PRC1023 },
3526 	[em_stat_prc1522] =
3527 	    { "rx 1024-maxB",	KSTAT_KV_U_PACKETS,	E1000_PRC1522 },
3528 	[em_stat_gprc] =
3529 	    { "rx good",	KSTAT_KV_U_PACKETS,	E1000_GPRC },
3530 	[em_stat_bprc] =
3531 	    { "rx bcast",	KSTAT_KV_U_PACKETS,	E1000_BPRC },
3532 	[em_stat_mprc] =
3533 	    { "rx mcast",	KSTAT_KV_U_PACKETS,	E1000_MPRC },
3534 	[em_stat_gptc] =
3535 	    { "tx good",	KSTAT_KV_U_PACKETS,	E1000_GPTC },
3536 	[em_stat_gorc] = /* 64bit */
3537 	    { "rx good",	KSTAT_KV_U_BYTES,	0 },
3538 	[em_stat_gotc] = /* 64bit */
3539 	    { "tx good",	KSTAT_KV_U_BYTES,	0 },
3540 	[em_stat_rnbc] =
3541 	    { "rx no buffers",	KSTAT_KV_U_PACKETS,	E1000_RNBC },
3542 	[em_stat_ruc] =
3543 	    { "rx undersize",	KSTAT_KV_U_PACKETS,	E1000_RUC },
3544 	[em_stat_rfc] =
3545 	    { "rx fragments",	KSTAT_KV_U_PACKETS,	E1000_RFC },
3546 	[em_stat_roc] =
3547 	    { "rx oversize",	KSTAT_KV_U_PACKETS,	E1000_ROC },
3548 	[em_stat_rjc] =
3549 	    { "rx jabbers",	KSTAT_KV_U_PACKETS,	E1000_RJC },
3550 	[em_stat_mgtprc] =
3551 	    { "rx mgmt",	KSTAT_KV_U_PACKETS,	E1000_MGTPRC },
3552 	[em_stat_mgtpdc] =
3553 	    { "rx mgmt drops",	KSTAT_KV_U_PACKETS,	E1000_MGTPDC },
3554 	[em_stat_mgtptc] =
3555 	    { "tx mgmt",	KSTAT_KV_U_PACKETS,	E1000_MGTPTC },
3556 	[em_stat_tor] = /* 64bit */
3557 	    { "rx total",	KSTAT_KV_U_BYTES,	0 },
3558 	[em_stat_tot] = /* 64bit */
3559 	    { "tx total",	KSTAT_KV_U_BYTES,	0 },
3560 	[em_stat_tpr] =
3561 	    { "rx total",	KSTAT_KV_U_PACKETS,	E1000_TPR },
3562 	[em_stat_tpt] =
3563 	    { "tx total",	KSTAT_KV_U_PACKETS,	E1000_TPT },
3564 	[em_stat_ptc64] =
3565 	    { "tx 64B",		KSTAT_KV_U_PACKETS,	E1000_PTC64 },
3566 	[em_stat_ptc127] =
3567 	    { "tx 65-127B",	KSTAT_KV_U_PACKETS,	E1000_PTC127 },
3568 	[em_stat_ptc255] =
3569 	    { "tx 128-255B",	KSTAT_KV_U_PACKETS,	E1000_PTC255 },
3570 	[em_stat_ptc511] =
3571 	    { "tx 256-511B",	KSTAT_KV_U_PACKETS,	E1000_PTC511 },
3572 	[em_stat_ptc1023] =
3573 	    { "tx 512-1023B",	KSTAT_KV_U_PACKETS,	E1000_PTC1023 },
3574 	[em_stat_ptc1522] =
3575 	    { "tx 1024-maxB",	KSTAT_KV_U_PACKETS,	E1000_PTC1522 },
3576 	[em_stat_mptc] =
3577 	    { "tx mcast",	KSTAT_KV_U_PACKETS,	E1000_MPTC },
3578 	[em_stat_bptc] =
3579 	    { "tx bcast",	KSTAT_KV_U_PACKETS,	E1000_BPTC },
3580 };
3581 
3582 /**********************************************************************
3583  *
3584  *  Update the board statistics counters.
3585  *
3586  **********************************************************************/
3587 int
3588 em_kstat_read(struct kstat *ks)
3589 {
3590 	struct em_softc *sc = ks->ks_softc;
3591 	struct em_hw *hw = &sc->hw;
3592 	struct kstat_kv *kvs = ks->ks_data;
3593 	uint32_t lo, hi;
3594 	unsigned int i;
3595 
3596 	for (i = 0; i < nitems(em_counters); i++) {
3597 		const struct em_counter *c = &em_counters[i];
3598 		if (c->reg == 0)
3599 			continue;
3600 
3601 		kstat_kv_u64(&kvs[i]) += EM_READ_REG(hw,
3602 		    E1000_REG_TR(hw, c->reg)); /* wtf */
3603 	}
3604 
3605 	/* Handle the exceptions. */
3606 
3607 	if (sc->hw.mac_type >= em_82543) {
3608 		kstat_kv_u64(&kvs[em_stat_algnerrc]) +=
3609 		    E1000_READ_REG(hw, ALGNERRC);
3610 		kstat_kv_u64(&kvs[em_stat_rxerrc]) +=
3611 		    E1000_READ_REG(hw, RXERRC);
3612 		kstat_kv_u64(&kvs[em_stat_cexterr]) +=
3613 		    E1000_READ_REG(hw, CEXTERR);
3614 		kstat_kv_u64(&kvs[em_stat_tncrs]) +=
3615 		    E1000_READ_REG(hw, TNCRS);
3616 #if 0
3617 		sc->stats.tsctc +=
3618 		E1000_READ_REG(hw, TSCTC);
3619 		sc->stats.tsctfc +=
3620 		E1000_READ_REG(hw, TSCTFC);
3621 #endif
3622 	}
3623 
3624 	/* For the 64-bit byte counters the low dword must be read first. */
3625 	/* Both registers clear on the read of the high dword */
3626 
3627 	lo = E1000_READ_REG(hw, GORCL);
3628 	hi = E1000_READ_REG(hw, GORCH);
3629 	kstat_kv_u64(&kvs[em_stat_gorc]) +=
3630 	    ((uint64_t)hi << 32) | (uint64_t)lo;
3631 
3632 	lo = E1000_READ_REG(hw, GOTCL);
3633 	hi = E1000_READ_REG(hw, GOTCH);
3634 	kstat_kv_u64(&kvs[em_stat_gotc]) +=
3635 	    ((uint64_t)hi << 32) | (uint64_t)lo;
3636 
3637 	lo = E1000_READ_REG(hw, TORL);
3638 	hi = E1000_READ_REG(hw, TORH);
3639 	kstat_kv_u64(&kvs[em_stat_tor]) +=
3640 	    ((uint64_t)hi << 32) | (uint64_t)lo;
3641 
3642 	lo = E1000_READ_REG(hw, TOTL);
3643 	hi = E1000_READ_REG(hw, TOTH);
3644 	kstat_kv_u64(&kvs[em_stat_tot]) +=
3645 	    ((uint64_t)hi << 32) | (uint64_t)lo;
3646 
3647 	getnanouptime(&ks->ks_updated);
3648 
3649 	return (0);
3650 }
3651 
3652 void
3653 em_kstat_attach(struct em_softc *sc)
3654 {
3655 	struct kstat *ks;
3656 	struct kstat_kv *kvs;
3657 	unsigned int i;
3658 
3659 	mtx_init(&sc->kstat_mtx, IPL_SOFTCLOCK);
3660 
3661 	ks = kstat_create(DEVNAME(sc), 0, "em-stats", 0,
3662 	    KSTAT_T_KV, 0);
3663 	if (ks == NULL)
3664 		return;
3665 
3666 	kvs = mallocarray(nitems(em_counters), sizeof(*kvs),
3667 	    M_DEVBUF, M_WAITOK|M_ZERO);
3668 	for (i = 0; i < nitems(em_counters); i++) {
3669 		const struct em_counter *c = &em_counters[i];
3670 		kstat_kv_unit_init(&kvs[i], c->name,
3671 		    KSTAT_KV_T_COUNTER64, c->unit);
3672 	}
3673 
3674 	ks->ks_softc = sc;
3675 	ks->ks_data = kvs;
3676 	ks->ks_datalen = nitems(em_counters) * sizeof(*kvs);
3677 	ks->ks_read = em_kstat_read;
3678 	kstat_set_mutex(ks, &sc->kstat_mtx);
3679 
3680 	kstat_install(ks);
3681 }
3682 
3683 /******************************************************************************
3684  * Adjusts the statistic counters when a frame is accepted by TBI_ACCEPT
3685  *****************************************************************************/
3686 void
3687 em_tbi_adjust_stats(struct em_softc *sc, uint32_t frame_len, uint8_t *mac_addr)
3688 {
3689 	struct em_hw *hw = &sc->hw;
3690 	struct kstat *ks = sc->kstat;
3691 	struct kstat_kv *kvs;
3692 
3693 	if (ks == NULL)
3694 		return;
3695 
3696 	/* First adjust the frame length. */
3697 	frame_len--;
3698 
3699 	mtx_enter(&sc->kstat_mtx);
3700 	kvs = ks->ks_data;
3701 
3702 	/*
3703 	 * We need to adjust the statistics counters, since the hardware
3704 	 * counters overcount this packet as a CRC error and undercount the
3705 	 * packet as a good packet
3706 	 */
3707 
3708 	/* This packet should not be counted as a CRC error.	*/
3709 	kstat_kv_u64(&kvs[em_stat_crcerrs])--;
3710 	/* This packet does count as a Good Packet Received.	*/
3711 	kstat_kv_u64(&kvs[em_stat_gprc])++;
3712 
3713 	/* Adjust the Good Octets received counters		*/
3714 	kstat_kv_u64(&kvs[em_stat_gorc]) += frame_len;
3715 
3716 	/*
3717 	 * Is this a broadcast or multicast?  Check broadcast first, since
3718 	 * the test for a multicast frame will test positive on a broadcast
3719 	 * frame.
3720 	 */
3721 	if (ETHER_IS_BROADCAST(mac_addr)) {
3722 		/* Broadcast packet */
3723 		kstat_kv_u64(&kvs[em_stat_bprc])++;
3724 	} else if (ETHER_IS_MULTICAST(mac_addr)) {
3725 		/* Multicast packet */
3726 		kstat_kv_u64(&kvs[em_stat_mprc])++;
3727 	}
3728 
3729 	if (frame_len == hw->max_frame_size) {
3730 		/*
3731 		 * In this case, the hardware has overcounted the number of
3732 		 * oversize frames.
3733 		 */
3734 		kstat_kv_u64(&kvs[em_stat_roc])--;
3735 	}
3736 
3737 	/*
3738 	 * Adjust the bin counters when the extra byte put the frame in the
3739 	 * wrong bin. Remember that the frame_len was adjusted above.
3740 	 */
3741 	if (frame_len == 64) {
3742 		kstat_kv_u64(&kvs[em_stat_prc64])++;
3743 		kstat_kv_u64(&kvs[em_stat_prc127])--;
3744 	} else if (frame_len == 127) {
3745 		kstat_kv_u64(&kvs[em_stat_prc127])++;
3746 		kstat_kv_u64(&kvs[em_stat_prc255])--;
3747 	} else if (frame_len == 255) {
3748 		kstat_kv_u64(&kvs[em_stat_prc255])++;
3749 		kstat_kv_u64(&kvs[em_stat_prc511])--;
3750 	} else if (frame_len == 511) {
3751 		kstat_kv_u64(&kvs[em_stat_prc511])++;
3752 		kstat_kv_u64(&kvs[em_stat_prc1023])--;
3753 	} else if (frame_len == 1023) {
3754 		kstat_kv_u64(&kvs[em_stat_prc1023])++;
3755 		kstat_kv_u64(&kvs[em_stat_prc1522])--;
3756 	} else if (frame_len == 1522) {
3757 		kstat_kv_u64(&kvs[em_stat_prc1522])++;
3758 	}
3759 
3760 	mtx_leave(&sc->kstat_mtx);
3761 }
3762 #endif /* NKSTAT > 0 */
3763 
3764 #ifndef SMALL_KERNEL
3765 int
3766 em_allocate_msix(struct em_softc *sc)
3767 {
3768 	pci_intr_handle_t	 ih;
3769 	const char		*intrstr = NULL;
3770 	struct pci_attach_args	*pa = &sc->osdep.em_pa;
3771 	pci_chipset_tag_t	 pc = pa->pa_pc;
3772 	struct em_queue		*que = sc->queues; /* Use only first queue. */
3773 	int			 vec;
3774 
3775 	if (!em_enable_msix)
3776 		return (ENODEV);
3777 
3778 	switch (sc->hw.mac_type) {
3779 	case em_82576:
3780 	case em_82580:
3781 	case em_i350:
3782 	case em_i210:
3783 		break;
3784 	default:
3785 		return (ENODEV);
3786 	}
3787 
3788 	vec = 0;
3789 	if (pci_intr_map_msix(pa, vec, &ih))
3790 		return (ENODEV);
3791 	sc->msix = 1;
3792 
3793 	que->me = vec;
3794 	que->eims = 1 << vec;
3795 	snprintf(que->name, sizeof(que->name), "%s:%d", DEVNAME(sc), vec);
3796 
3797 	intrstr = pci_intr_string(pc, ih);
3798 	que->tag = pci_intr_establish(pc, ih, IPL_NET | IPL_MPSAFE,
3799 	    em_queue_intr_msix, que, que->name);
3800 	if (que->tag == NULL) {
3801 		printf(": couldn't establish interrupt");
3802 		if (intrstr != NULL)
3803 			printf(" at %s", intrstr);
3804 		printf("\n");
3805 		return (ENXIO);
3806 	}
3807 
3808 	/* Setup linkvector, use last queue vector + 1 */
3809 	vec++;
3810 	sc->msix_linkvec = vec;
3811 	if (pci_intr_map_msix(pa, sc->msix_linkvec, &ih)) {
3812 		printf(": couldn't map link vector\n");
3813 		return (ENXIO);
3814 	}
3815 
3816 	intrstr = pci_intr_string(pc, ih);
3817 	sc->sc_intrhand = pci_intr_establish(pc, ih, IPL_NET | IPL_MPSAFE,
3818 	    em_link_intr_msix, sc, DEVNAME(sc));
3819 	if (sc->sc_intrhand == NULL) {
3820 		printf(": couldn't establish interrupt");
3821 		if (intrstr != NULL)
3822 			printf(" at %s", intrstr);
3823 		printf("\n");
3824 		return (ENXIO);
3825 	}
3826 	printf(", %s, %d queue%s", intrstr, vec, (vec > 1) ? "s" : "");
3827 
3828 	return (0);
3829 }
3830 
3831 /*
3832  * Interrupt for a specific queue, (not link interrupts). The EICR bit which
3833  * maps to the EIMS bit expresses both RX and TX, therefore we can't
3834  * distringuish if this is a RX completion of TX completion and must do both.
3835  * The bits in EICR are autocleared and we _cannot_ read EICR.
3836  */
3837 int
3838 em_queue_intr_msix(void *vque)
3839 {
3840 	struct em_queue *que = vque;
3841 	struct em_softc *sc = que->sc;
3842 	struct ifnet   *ifp = &sc->sc_ac.ac_if;
3843 
3844 	if (ifp->if_flags & IFF_RUNNING) {
3845 		em_txeof(que);
3846 		if (em_rxeof(que))
3847 			em_rxrefill(que);
3848 	}
3849 
3850 	em_enable_queue_intr_msix(que);
3851 
3852 	return (1);
3853 }
3854 
3855 int
3856 em_link_intr_msix(void *arg)
3857 {
3858 	struct em_softc *sc = arg;
3859 	uint32_t icr;
3860 
3861 	icr = E1000_READ_REG(&sc->hw, ICR);
3862 
3863 	/* Link status change */
3864 	if (icr & E1000_ICR_LSC) {
3865 		KERNEL_LOCK();
3866 		sc->hw.get_link_status = 1;
3867 		em_check_for_link(&sc->hw);
3868 		em_update_link_status(sc);
3869 		KERNEL_UNLOCK();
3870 	}
3871 
3872 	/* Re-arm unconditionally */
3873 	E1000_WRITE_REG(&sc->hw, IMS, E1000_ICR_LSC);
3874 	E1000_WRITE_REG(&sc->hw, EIMS, sc->msix_linkmask);
3875 
3876 	return (1);
3877 }
3878 
3879 /*
3880  * Maps queues into msix interrupt vectors.
3881  */
3882 int
3883 em_setup_queues_msix(struct em_softc *sc)
3884 {
3885 	uint32_t ivar, newitr, index;
3886 	struct em_queue *que;
3887 
3888 	KASSERT(sc->msix);
3889 
3890 	/* First turn on RSS capability */
3891 	if (sc->hw.mac_type != em_82575)
3892 		E1000_WRITE_REG(&sc->hw, GPIE,
3893 		    E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
3894 		    E1000_GPIE_PBA | E1000_GPIE_NSICR);
3895 
3896 	/* Turn on MSIX */
3897 	switch (sc->hw.mac_type) {
3898 	case em_82580:
3899 	case em_i350:
3900 	case em_i210:
3901 		/* RX entries */
3902 		/*
3903 		 * Note, this maps Queues into MSIX vectors, it works fine.
3904 		 * The funky calculation of offsets and checking if que->me is
3905 		 * odd is due to the weird register distribution, the datasheet
3906 		 * explains it well.
3907 		 */
3908 		FOREACH_QUEUE(sc, que) {
3909 			index = que->me >> 1;
3910 			ivar = E1000_READ_REG_ARRAY(&sc->hw, IVAR0, index);
3911 			if (que->me & 1) {
3912 				ivar &= 0xFF00FFFF;
3913 				ivar |= (que->me | E1000_IVAR_VALID) << 16;
3914 			} else {
3915 				ivar &= 0xFFFFFF00;
3916 				ivar |= que->me | E1000_IVAR_VALID;
3917 			}
3918 			E1000_WRITE_REG_ARRAY(&sc->hw, IVAR0, index, ivar);
3919 		}
3920 
3921 		/* TX entries */
3922 		FOREACH_QUEUE(sc, que) {
3923 			index = que->me >> 1;
3924 			ivar = E1000_READ_REG_ARRAY(&sc->hw, IVAR0, index);
3925 			if (que->me & 1) {
3926 				ivar &= 0x00FFFFFF;
3927 				ivar |= (que->me | E1000_IVAR_VALID) << 24;
3928 			} else {
3929 				ivar &= 0xFFFF00FF;
3930 				ivar |= (que->me | E1000_IVAR_VALID) << 8;
3931 			}
3932 			E1000_WRITE_REG_ARRAY(&sc->hw, IVAR0, index, ivar);
3933 			sc->msix_queuesmask |= que->eims;
3934 		}
3935 
3936 		/* And for the link interrupt */
3937 		ivar = (sc->msix_linkvec | E1000_IVAR_VALID) << 8;
3938 		sc->msix_linkmask = 1 << sc->msix_linkvec;
3939 		E1000_WRITE_REG(&sc->hw, IVAR_MISC, ivar);
3940 		break;
3941 	case em_82576:
3942 		/* RX entries */
3943 		FOREACH_QUEUE(sc, que) {
3944 			index = que->me & 0x7; /* Each IVAR has two entries */
3945 			ivar = E1000_READ_REG_ARRAY(&sc->hw, IVAR0, index);
3946 			if (que->me < 8) {
3947 				ivar &= 0xFFFFFF00;
3948 				ivar |= que->me | E1000_IVAR_VALID;
3949 			} else {
3950 				ivar &= 0xFF00FFFF;
3951 				ivar |= (que->me | E1000_IVAR_VALID) << 16;
3952 			}
3953 			E1000_WRITE_REG_ARRAY(&sc->hw, IVAR0, index, ivar);
3954 			sc->msix_queuesmask |= que->eims;
3955 		}
3956 		/* TX entries */
3957 		FOREACH_QUEUE(sc, que) {
3958 			index = que->me & 0x7; /* Each IVAR has two entries */
3959 			ivar = E1000_READ_REG_ARRAY(&sc->hw, IVAR0, index);
3960 			if (que->me < 8) {
3961 				ivar &= 0xFFFF00FF;
3962 				ivar |= (que->me | E1000_IVAR_VALID) << 8;
3963 			} else {
3964 				ivar &= 0x00FFFFFF;
3965 				ivar |= (que->me | E1000_IVAR_VALID) << 24;
3966 			}
3967 			E1000_WRITE_REG_ARRAY(&sc->hw, IVAR0, index, ivar);
3968 			sc->msix_queuesmask |= que->eims;
3969 		}
3970 
3971 		/* And for the link interrupt */
3972 		ivar = (sc->msix_linkvec | E1000_IVAR_VALID) << 8;
3973 		sc->msix_linkmask = 1 << sc->msix_linkvec;
3974 		E1000_WRITE_REG(&sc->hw, IVAR_MISC, ivar);
3975 		break;
3976 	default:
3977 		panic("unsupported mac");
3978 		break;
3979 	}
3980 
3981 	/* Set the starting interrupt rate */
3982 	newitr = (4000000 / MAX_INTS_PER_SEC) & 0x7FFC;
3983 
3984 	if (sc->hw.mac_type == em_82575)
3985 		newitr |= newitr << 16;
3986 	else
3987 		newitr |= E1000_EITR_CNT_IGNR;
3988 
3989 	FOREACH_QUEUE(sc, que)
3990 		E1000_WRITE_REG(&sc->hw, EITR(que->me), newitr);
3991 
3992 	return (0);
3993 }
3994 
3995 void
3996 em_enable_queue_intr_msix(struct em_queue *que)
3997 {
3998 	E1000_WRITE_REG(&que->sc->hw, EIMS, que->eims);
3999 }
4000 #endif /* !SMALL_KERNEL */
4001 
4002 int
4003 em_allocate_desc_rings(struct em_softc *sc)
4004 {
4005 	struct em_queue *que;
4006 
4007 	FOREACH_QUEUE(sc, que) {
4008 		/* Allocate Transmit Descriptor ring */
4009 		if (em_dma_malloc(sc, sc->sc_tx_slots * sizeof(struct em_tx_desc),
4010 		    &que->tx.sc_tx_dma) != 0) {
4011 			printf("%s: Unable to allocate tx_desc memory\n",
4012 			    DEVNAME(sc));
4013 			return (ENOMEM);
4014 		}
4015 		que->tx.sc_tx_desc_ring =
4016 		    (struct em_tx_desc *)que->tx.sc_tx_dma.dma_vaddr;
4017 
4018 		/* Allocate Receive Descriptor ring */
4019 		if (em_dma_malloc(sc, sc->sc_rx_slots * sizeof(struct em_rx_desc),
4020 		    &que->rx.sc_rx_dma) != 0) {
4021 			printf("%s: Unable to allocate rx_desc memory\n",
4022 			    DEVNAME(sc));
4023 			return (ENOMEM);
4024 		}
4025 		que->rx.sc_rx_desc_ring =
4026 		    (struct em_rx_desc *)que->rx.sc_rx_dma.dma_vaddr;
4027 	}
4028 
4029 	return (0);
4030 }
4031