xref: /openbsd-src/sys/dev/pci/if_em.c (revision f84b1df5a16cdd762c93854218de246e79975d3b)
1 /**************************************************************************
2 
3 Copyright (c) 2001-2003, Intel Corporation
4 All rights reserved.
5 
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8 
9  1. Redistributions of source code must retain the above copyright notice,
10     this list of conditions and the following disclaimer.
11 
12  2. Redistributions in binary form must reproduce the above copyright
13     notice, this list of conditions and the following disclaimer in the
14     documentation and/or other materials provided with the distribution.
15 
16  3. Neither the name of the Intel Corporation nor the names of its
17     contributors may be used to endorse or promote products derived from
18     this software without specific prior written permission.
19 
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 POSSIBILITY OF SUCH DAMAGE.
31 
32 ***************************************************************************/
33 
34 /* $OpenBSD: if_em.c,v 1.361 2022/03/11 18:00:45 mpi Exp $ */
35 /* $FreeBSD: if_em.c,v 1.46 2004/09/29 18:28:28 mlaier Exp $ */
36 
37 #include <dev/pci/if_em.h>
38 #include <dev/pci/if_em_soc.h>
39 
40 /*********************************************************************
41  *  Driver version
42  *********************************************************************/
43 
44 #define EM_DRIVER_VERSION	"6.2.9"
45 
46 /*********************************************************************
47  *  PCI Device ID Table
48  *********************************************************************/
49 const struct pci_matchid em_devices[] = {
50 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_80003ES2LAN_CPR_DPT },
51 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_80003ES2LAN_SDS_DPT },
52 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_80003ES2LAN_CPR_SPT },
53 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_80003ES2LAN_SDS_SPT },
54 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82540EM },
55 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82540EM_LOM },
56 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82540EP },
57 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82540EP_LOM },
58 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82540EP_LP },
59 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82541EI },
60 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82541EI_MOBILE },
61 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82541ER },
62 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82541ER_LOM },
63 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82541GI },
64 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82541GI_LF },
65 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82541GI_MOBILE },
66 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82542 },
67 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82543GC_COPPER },
68 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82543GC_FIBER },
69 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82544EI_COPPER },
70 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82544EI_FIBER },
71 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82544GC_COPPER },
72 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82544GC_LOM },
73 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82545EM_COPPER },
74 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82545EM_FIBER },
75 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82545GM_COPPER },
76 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82545GM_FIBER },
77 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82545GM_SERDES },
78 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82546EB_COPPER },
79 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82546EB_FIBER },
80 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82546EB_QUAD_CPR },
81 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82546GB_COPPER },
82 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82546GB_FIBER },
83 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82546GB_PCIE },
84 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82546GB_QUAD_CPR },
85 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82546GB_QUAD_CPR_K },
86 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82546GB_SERDES },
87 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82546GB_2 },
88 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82547EI },
89 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82547EI_MOBILE },
90 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82547GI },
91 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82571EB_AF },
92 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82571EB_AT },
93 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82571EB_COPPER },
94 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82571EB_FIBER },
95 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82571EB_QUAD_CPR },
96 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82571EB_QUAD_CPR_LP },
97 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82571EB_QUAD_FBR },
98 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82571EB_SERDES },
99 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82571EB_SDS_DUAL },
100 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82571EB_SDS_QUAD },
101 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82571PT_QUAD_CPR },
102 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82572EI_COPPER },
103 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82572EI_FIBER },
104 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82572EI_SERDES },
105 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82572EI },
106 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82573E },
107 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82573E_IAMT },
108 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82573E_PM },
109 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82573L },
110 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82573L_PL_1 },
111 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82573L_PL_2 },
112 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82573V_PM },
113 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82574L },
114 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82574LA },
115 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82575EB_COPPER },
116 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82575EB_SERDES },
117 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82575GB_QUAD_CPR },
118 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82575GB_QP_PM },
119 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82576 },
120 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82576_FIBER },
121 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82576_SERDES },
122 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82576_QUAD_COPPER },
123 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82576_QUAD_CU_ET2 },
124 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82576_NS },
125 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82576_NS_SERDES },
126 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82576_SERDES_QUAD },
127 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82577LC },
128 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82577LM },
129 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82578DC },
130 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82578DM },
131 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82579LM },
132 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82579V },
133 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I210_COPPER },
134 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I210_COPPER_OEM1 },
135 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I210_COPPER_IT },
136 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I210_FIBER },
137 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I210_SERDES },
138 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I210_SGMII },
139 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I210_COPPER_NF },
140 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I210_SERDES_NF },
141 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I211_COPPER },
142 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I217_LM },
143 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I217_V },
144 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I218_LM },
145 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I218_LM_2 },
146 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I218_LM_3 },
147 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I218_V },
148 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I218_V_2 },
149 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I218_V_3 },
150 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM },
151 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM2 },
152 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM3 },
153 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM4 },
154 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM5 },
155 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM6 },
156 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM7 },
157 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM8 },
158 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM9 },
159 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM10 },
160 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM11 },
161 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM12 },
162 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM13 },
163 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM14 },
164 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM15 },
165 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM16 },
166 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM17 },
167 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM18 },
168 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM19 },
169 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V },
170 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V2 },
171 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V4 },
172 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V5 },
173 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V6 },
174 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V7 },
175 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V8 },
176 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V9 },
177 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V10 },
178 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V11 },
179 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V12 },
180 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V13 },
181 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V14 },
182 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V15 },
183 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V16 },
184 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V17 },
185 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V18 },
186 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V19 },
187 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82580_COPPER },
188 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82580_FIBER },
189 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82580_SERDES },
190 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82580_SGMII },
191 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82580_COPPER_DUAL },
192 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82580_QUAD_FIBER },
193 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_DH89XXCC_SGMII },
194 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_DH89XXCC_SERDES },
195 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_DH89XXCC_BPLANE },
196 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_DH89XXCC_SFP },
197 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82583V },
198 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I350_COPPER },
199 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I350_FIBER },
200 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I350_SERDES },
201 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I350_SGMII },
202 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I354_BP_1GBPS },
203 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I354_BP_2_5GBPS },
204 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I354_SGMII },
205 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH8_82567V_3 },
206 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH8_IFE },
207 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH8_IFE_G },
208 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH8_IFE_GT },
209 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH8_IGP_AMT },
210 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH8_IGP_C },
211 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH8_IGP_M },
212 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH8_IGP_M_AMT },
213 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH9_BM },
214 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH9_IFE },
215 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH9_IFE_G },
216 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH9_IFE_GT },
217 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH9_IGP_AMT },
218 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH9_IGP_C },
219 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH9_IGP_M },
220 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH9_IGP_M_AMT },
221 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH9_IGP_M_V },
222 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH10_D_BM_LF },
223 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH10_D_BM_LM },
224 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH10_D_BM_V },
225 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH10_R_BM_LF },
226 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH10_R_BM_LM },
227 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH10_R_BM_V },
228 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_EP80579_LAN_1 },
229 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_EP80579_LAN_2 },
230 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_EP80579_LAN_3 },
231 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_EP80579_LAN_4 },
232 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_EP80579_LAN_5 },
233 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_EP80579_LAN_6 }
234 };
235 
236 /*********************************************************************
237  *  Function prototypes
238  *********************************************************************/
239 int  em_probe(struct device *, void *, void *);
240 void em_attach(struct device *, struct device *, void *);
241 void em_defer_attach(struct device*);
242 int  em_detach(struct device *, int);
243 int  em_activate(struct device *, int);
244 int  em_intr(void *);
245 int  em_allocate_legacy(struct em_softc *);
246 void em_start(struct ifqueue *);
247 int  em_ioctl(struct ifnet *, u_long, caddr_t);
248 void em_watchdog(struct ifnet *);
249 void em_init(void *);
250 void em_stop(void *, int);
251 void em_media_status(struct ifnet *, struct ifmediareq *);
252 int  em_media_change(struct ifnet *);
253 uint64_t  em_flowstatus(struct em_softc *);
254 void em_identify_hardware(struct em_softc *);
255 int  em_allocate_pci_resources(struct em_softc *);
256 void em_free_pci_resources(struct em_softc *);
257 void em_local_timer(void *);
258 int  em_hardware_init(struct em_softc *);
259 void em_setup_interface(struct em_softc *);
260 int  em_setup_transmit_structures(struct em_softc *);
261 void em_initialize_transmit_unit(struct em_softc *);
262 int  em_setup_receive_structures(struct em_softc *);
263 void em_initialize_receive_unit(struct em_softc *);
264 void em_enable_intr(struct em_softc *);
265 void em_disable_intr(struct em_softc *);
266 void em_free_transmit_structures(struct em_softc *);
267 void em_free_receive_structures(struct em_softc *);
268 void em_update_stats_counters(struct em_softc *);
269 void em_disable_aspm(struct em_softc *);
270 void em_txeof(struct em_queue *);
271 int  em_allocate_receive_structures(struct em_softc *);
272 int  em_allocate_transmit_structures(struct em_softc *);
273 int  em_allocate_desc_rings(struct em_softc *);
274 int  em_rxfill(struct em_queue *);
275 void em_rxrefill(void *);
276 int  em_rxeof(struct em_queue *);
277 void em_receive_checksum(struct em_softc *, struct em_rx_desc *,
278 			 struct mbuf *);
279 u_int	em_transmit_checksum_setup(struct em_queue *, struct mbuf *, u_int,
280 	    u_int32_t *, u_int32_t *);
281 void em_iff(struct em_softc *);
282 void em_update_link_status(struct em_softc *);
283 int  em_get_buf(struct em_queue *, int);
284 void em_enable_hw_vlans(struct em_softc *);
285 u_int em_encap(struct em_queue *, struct mbuf *);
286 void em_smartspeed(struct em_softc *);
287 int  em_82547_fifo_workaround(struct em_softc *, int);
288 void em_82547_update_fifo_head(struct em_softc *, int);
289 int  em_82547_tx_fifo_reset(struct em_softc *);
290 void em_82547_move_tail(void *arg);
291 void em_82547_move_tail_locked(struct em_softc *);
292 int  em_dma_malloc(struct em_softc *, bus_size_t, struct em_dma_alloc *);
293 void em_dma_free(struct em_softc *, struct em_dma_alloc *);
294 u_int32_t em_fill_descriptors(u_int64_t address, u_int32_t length,
295 			      PDESC_ARRAY desc_array);
296 void em_flush_tx_ring(struct em_queue *);
297 void em_flush_rx_ring(struct em_queue *);
298 void em_flush_desc_rings(struct em_softc *);
299 int em_get_sffpage(struct em_softc *, struct if_sffpage *);
300 
301 #ifndef SMALL_KERNEL
302 /* MSIX/Multiqueue functions */
303 int  em_allocate_msix(struct em_softc *);
304 int  em_setup_queues_msix(struct em_softc *);
305 int  em_queue_intr_msix(void *);
306 int  em_link_intr_msix(void *);
307 void em_enable_queue_intr_msix(struct em_queue *);
308 #else
309 #define em_allocate_msix(_sc) 	(-1)
310 #endif
311 
312 #if NKSTAT > 0
313 void	em_kstat_attach(struct em_softc *);
314 int	em_kstat_read(struct kstat *);
315 void	em_tbi_adjust_stats(struct em_softc *, uint32_t, uint8_t *);
316 #endif
317 
318 /*********************************************************************
319  *  OpenBSD Device Interface Entry Points
320  *********************************************************************/
321 
322 const struct cfattach em_ca = {
323 	sizeof(struct em_softc), em_probe, em_attach, em_detach,
324 	em_activate
325 };
326 
327 struct cfdriver em_cd = {
328 	NULL, "em", DV_IFNET
329 };
330 
331 static int em_smart_pwr_down = FALSE;
332 int em_enable_msix = 0;
333 
334 /*********************************************************************
335  *  Device identification routine
336  *
337  *  em_probe determines if the driver should be loaded on
338  *  adapter based on PCI vendor/device id of the adapter.
339  *
340  *  return 0 on no match, positive on match
341  *********************************************************************/
342 
343 int
344 em_probe(struct device *parent, void *match, void *aux)
345 {
346 	INIT_DEBUGOUT("em_probe: begin");
347 
348 	return (pci_matchbyid((struct pci_attach_args *)aux, em_devices,
349 	    nitems(em_devices)));
350 }
351 
352 void
353 em_defer_attach(struct device *self)
354 {
355 	struct em_softc *sc = (struct em_softc *)self;
356 	struct pci_attach_args *pa = &sc->osdep.em_pa;
357 	pci_chipset_tag_t	pc = pa->pa_pc;
358 	void *gcu;
359 
360 	INIT_DEBUGOUT("em_defer_attach: begin");
361 
362 	if ((gcu = em_lookup_gcu(self)) == 0) {
363 		printf("%s: No GCU found, deferred attachment failed\n",
364 		    DEVNAME(sc));
365 
366 		if (sc->sc_intrhand)
367 			pci_intr_disestablish(pc, sc->sc_intrhand);
368 		sc->sc_intrhand = 0;
369 
370 		em_stop(sc, 1);
371 
372 		em_free_pci_resources(sc);
373 
374 		return;
375 	}
376 
377 	sc->hw.gcu = gcu;
378 
379 	em_attach_miibus(self);
380 
381 	em_setup_interface(sc);
382 
383 	em_setup_link(&sc->hw);
384 
385 	em_update_link_status(sc);
386 }
387 
388 /*********************************************************************
389  *  Device initialization routine
390  *
391  *  The attach entry point is called when the driver is being loaded.
392  *  This routine identifies the type of hardware, allocates all resources
393  *  and initializes the hardware.
394  *
395  *********************************************************************/
396 
397 void
398 em_attach(struct device *parent, struct device *self, void *aux)
399 {
400 	struct pci_attach_args *pa = aux;
401 	struct em_softc *sc;
402 	int defer = 0;
403 
404 	INIT_DEBUGOUT("em_attach: begin");
405 
406 	sc = (struct em_softc *)self;
407 	sc->sc_dmat = pa->pa_dmat;
408 	sc->osdep.em_pa = *pa;
409 
410 	timeout_set(&sc->timer_handle, em_local_timer, sc);
411 	timeout_set(&sc->tx_fifo_timer_handle, em_82547_move_tail, sc);
412 
413 	rw_init(&sc->sfflock, "emsff");
414 
415 	/* Determine hardware revision */
416 	em_identify_hardware(sc);
417 
418 	/*
419 	 * Only use MSI on the newer PCIe parts, with the exception
420 	 * of 82571/82572 due to "Byte Enables 2 and 3 Are Not Set" errata
421 	 */
422 	if (sc->hw.mac_type <= em_82572)
423 		sc->osdep.em_pa.pa_flags &= ~PCI_FLAGS_MSI_ENABLED;
424 
425 	/* Parameters (to be read from user) */
426 	if (sc->hw.mac_type >= em_82544) {
427 		sc->sc_tx_slots = EM_MAX_TXD;
428 		sc->sc_rx_slots = EM_MAX_RXD;
429 	} else {
430 		sc->sc_tx_slots = EM_MAX_TXD_82543;
431 		sc->sc_rx_slots = EM_MAX_RXD_82543;
432 	}
433 	sc->tx_int_delay = EM_TIDV;
434 	sc->tx_abs_int_delay = EM_TADV;
435 	sc->rx_int_delay = EM_RDTR;
436 	sc->rx_abs_int_delay = EM_RADV;
437 	sc->hw.autoneg = DO_AUTO_NEG;
438 	sc->hw.wait_autoneg_complete = WAIT_FOR_AUTO_NEG_DEFAULT;
439 	sc->hw.autoneg_advertised = AUTONEG_ADV_DEFAULT;
440 	sc->hw.tbi_compatibility_en = TRUE;
441 	sc->sc_rx_buffer_len = EM_RXBUFFER_2048;
442 
443 	sc->hw.phy_init_script = 1;
444 	sc->hw.phy_reset_disable = FALSE;
445 
446 #ifndef EM_MASTER_SLAVE
447 	sc->hw.master_slave = em_ms_hw_default;
448 #else
449 	sc->hw.master_slave = EM_MASTER_SLAVE;
450 #endif
451 
452 	/*
453 	 * This controls when hardware reports transmit completion
454 	 * status.
455 	 */
456 	sc->hw.report_tx_early = 1;
457 
458 	if (em_allocate_pci_resources(sc))
459 		goto err_pci;
460 
461 	/* Initialize eeprom parameters */
462 	em_init_eeprom_params(&sc->hw);
463 
464 	/*
465 	 * Set the max frame size assuming standard Ethernet
466 	 * sized frames.
467 	 */
468 	switch (sc->hw.mac_type) {
469 		case em_82573:
470 		{
471 			uint16_t	eeprom_data = 0;
472 
473 			/*
474 			 * 82573 only supports Jumbo frames
475 			 * if ASPM is disabled.
476 			 */
477 			em_read_eeprom(&sc->hw, EEPROM_INIT_3GIO_3,
478 			    1, &eeprom_data);
479 			if (eeprom_data & EEPROM_WORD1A_ASPM_MASK) {
480 				sc->hw.max_frame_size = ETHER_MAX_LEN;
481 				break;
482 			}
483 			/* Allow Jumbo frames */
484 			/* FALLTHROUGH */
485 		}
486 		case em_82571:
487 		case em_82572:
488 		case em_82574:
489 		case em_82575:
490 		case em_82576:
491 		case em_82580:
492 		case em_i210:
493 		case em_i350:
494 		case em_ich9lan:
495 		case em_ich10lan:
496 		case em_pch2lan:
497 		case em_pch_lpt:
498 		case em_pch_spt:
499 		case em_pch_cnp:
500 		case em_80003es2lan:
501 			/* 9K Jumbo Frame size */
502 			sc->hw.max_frame_size = 9234;
503 			break;
504 		case em_pchlan:
505 			sc->hw.max_frame_size = 4096;
506 			break;
507 		case em_82542_rev2_0:
508 		case em_82542_rev2_1:
509 		case em_ich8lan:
510 			/* Adapters that do not support Jumbo frames */
511 			sc->hw.max_frame_size = ETHER_MAX_LEN;
512 			break;
513 		default:
514 			sc->hw.max_frame_size =
515 			    MAX_JUMBO_FRAME_SIZE;
516 	}
517 
518 	sc->hw.min_frame_size =
519 	    ETHER_MIN_LEN + ETHER_CRC_LEN;
520 
521 	if (em_allocate_desc_rings(sc) != 0) {
522 		printf("%s: Unable to allocate descriptor ring memory\n",
523 		    DEVNAME(sc));
524 		goto err_pci;
525 	}
526 
527 	/* Initialize the hardware */
528 	if ((defer = em_hardware_init(sc))) {
529 		if (defer == EAGAIN)
530 			config_defer(self, em_defer_attach);
531 		else {
532 			printf("%s: Unable to initialize the hardware\n",
533 			    DEVNAME(sc));
534 			goto err_pci;
535 		}
536 	}
537 
538 	if (sc->hw.mac_type == em_80003es2lan || sc->hw.mac_type == em_82575 ||
539 	    sc->hw.mac_type == em_82576 ||
540 	    sc->hw.mac_type == em_82580 || sc->hw.mac_type == em_i210 ||
541 	    sc->hw.mac_type == em_i350) {
542 		uint32_t reg = EM_READ_REG(&sc->hw, E1000_STATUS);
543 		sc->hw.bus_func = (reg & E1000_STATUS_FUNC_MASK) >>
544 		    E1000_STATUS_FUNC_SHIFT;
545 
546 		switch (sc->hw.bus_func) {
547 		case 0:
548 			sc->hw.swfw = E1000_SWFW_PHY0_SM;
549 			break;
550 		case 1:
551 			sc->hw.swfw = E1000_SWFW_PHY1_SM;
552 			break;
553 		case 2:
554 			sc->hw.swfw = E1000_SWFW_PHY2_SM;
555 			break;
556 		case 3:
557 			sc->hw.swfw = E1000_SWFW_PHY3_SM;
558 			break;
559 		}
560 	} else {
561 		sc->hw.bus_func = 0;
562 	}
563 
564 	/* Copy the permanent MAC address out of the EEPROM */
565 	if (em_read_mac_addr(&sc->hw) < 0) {
566 		printf("%s: EEPROM read error while reading mac address\n",
567 		       DEVNAME(sc));
568 		goto err_pci;
569 	}
570 
571 	bcopy(sc->hw.mac_addr, sc->sc_ac.ac_enaddr, ETHER_ADDR_LEN);
572 
573 	/* Setup OS specific network interface */
574 	if (!defer)
575 		em_setup_interface(sc);
576 
577 	/* Initialize statistics */
578 	em_clear_hw_cntrs(&sc->hw);
579 #if NKSTAT > 0
580 	em_kstat_attach(sc);
581 #endif
582 	sc->hw.get_link_status = 1;
583 	if (!defer)
584 		em_update_link_status(sc);
585 
586 #ifdef EM_DEBUG
587 	printf(", mac %#x phy %#x", sc->hw.mac_type, sc->hw.phy_type);
588 #endif
589 	printf(", address %s\n", ether_sprintf(sc->sc_ac.ac_enaddr));
590 
591 	/* Indicate SOL/IDER usage */
592 	if (em_check_phy_reset_block(&sc->hw))
593 		printf("%s: PHY reset is blocked due to SOL/IDER session.\n",
594 		    DEVNAME(sc));
595 
596 	/* Identify 82544 on PCI-X */
597 	em_get_bus_info(&sc->hw);
598 	if (sc->hw.bus_type == em_bus_type_pcix &&
599 	    sc->hw.mac_type == em_82544)
600 		sc->pcix_82544 = TRUE;
601         else
602 		sc->pcix_82544 = FALSE;
603 
604 	sc->hw.icp_xxxx_is_link_up = FALSE;
605 
606 	INIT_DEBUGOUT("em_attach: end");
607 	return;
608 
609 err_pci:
610 	em_free_pci_resources(sc);
611 }
612 
613 /*********************************************************************
614  *  Transmit entry point
615  *
616  *  em_start is called by the stack to initiate a transmit.
617  *  The driver will remain in this routine as long as there are
618  *  packets to transmit and transmit resources are available.
619  *  In case resources are not available stack is notified and
620  *  the packet is requeued.
621  **********************************************************************/
622 
623 void
624 em_start(struct ifqueue *ifq)
625 {
626 	struct ifnet *ifp = ifq->ifq_if;
627 	struct em_softc *sc = ifp->if_softc;
628 	u_int head, free, used;
629 	struct mbuf *m;
630 	int post = 0;
631 	struct em_queue *que = sc->queues; /* Use only first queue. */
632 
633 	if (!sc->link_active) {
634 		ifq_purge(ifq);
635 		return;
636 	}
637 
638 	/* calculate free space */
639 	head = que->tx.sc_tx_desc_head;
640 	free = que->tx.sc_tx_desc_tail;
641 	if (free <= head)
642 		free += sc->sc_tx_slots;
643 	free -= head;
644 
645 	if (sc->hw.mac_type != em_82547) {
646 		bus_dmamap_sync(sc->sc_dmat, que->tx.sc_tx_dma.dma_map,
647 		    0, que->tx.sc_tx_dma.dma_map->dm_mapsize,
648 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
649 	}
650 
651 	for (;;) {
652 		/* use 2 because cksum setup can use an extra slot */
653 		if (EM_MAX_SCATTER + 2 > free) {
654 			ifq_set_oactive(ifq);
655 			break;
656 		}
657 
658 		m = ifq_dequeue(ifq);
659 		if (m == NULL)
660 			break;
661 
662 		used = em_encap(que, m);
663 		if (used == 0) {
664 			m_freem(m);
665 			continue;
666 		}
667 
668 		KASSERT(used <= free);
669 
670 		free -= used;
671 
672 #if NBPFILTER > 0
673 		/* Send a copy of the frame to the BPF listener */
674 		if (ifp->if_bpf)
675 			bpf_mtap_ether(ifp->if_bpf, m, BPF_DIRECTION_OUT);
676 #endif
677 
678 		/* Set timeout in case hardware has problems transmitting */
679 		ifp->if_timer = EM_TX_TIMEOUT;
680 
681 		if (sc->hw.mac_type == em_82547) {
682 			int len = m->m_pkthdr.len;
683 
684 			if (sc->link_duplex == HALF_DUPLEX)
685 				em_82547_move_tail_locked(sc);
686 			else {
687 				E1000_WRITE_REG(&sc->hw, TDT(que->me),
688 				    que->tx.sc_tx_desc_head);
689 				em_82547_update_fifo_head(sc, len);
690 			}
691 		}
692 
693 		post = 1;
694 	}
695 
696 	if (sc->hw.mac_type != em_82547) {
697 		bus_dmamap_sync(sc->sc_dmat, que->tx.sc_tx_dma.dma_map,
698 		    0, que->tx.sc_tx_dma.dma_map->dm_mapsize,
699 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
700 		/*
701 		 * Advance the Transmit Descriptor Tail (Tdt),
702 		 * this tells the E1000 that this frame is
703 		 * available to transmit.
704 		 */
705 		if (post)
706 			E1000_WRITE_REG(&sc->hw, TDT(que->me),
707 			    que->tx.sc_tx_desc_head);
708 	}
709 }
710 
711 /*********************************************************************
712  *  Ioctl entry point
713  *
714  *  em_ioctl is called when the user wants to configure the
715  *  interface.
716  *
717  *  return 0 on success, positive on failure
718  **********************************************************************/
719 
720 int
721 em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
722 {
723 	int		error = 0;
724 	struct ifreq   *ifr = (struct ifreq *) data;
725 	struct em_softc *sc = ifp->if_softc;
726 	int s;
727 
728 	s = splnet();
729 
730 	switch (command) {
731 	case SIOCSIFADDR:
732 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFADDR (Set Interface "
733 			       "Addr)");
734 		if (!(ifp->if_flags & IFF_UP)) {
735 			ifp->if_flags |= IFF_UP;
736 			em_init(sc);
737 		}
738 		break;
739 
740 	case SIOCSIFFLAGS:
741 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFFLAGS (Set Interface Flags)");
742 		if (ifp->if_flags & IFF_UP) {
743 			if (ifp->if_flags & IFF_RUNNING)
744 				error = ENETRESET;
745 			else
746 				em_init(sc);
747 		} else {
748 			if (ifp->if_flags & IFF_RUNNING)
749 				em_stop(sc, 0);
750 		}
751 		break;
752 
753 	case SIOCSIFMEDIA:
754 		/* Check SOL/IDER usage */
755 		if (em_check_phy_reset_block(&sc->hw)) {
756 			printf("%s: Media change is blocked due to SOL/IDER session.\n",
757 			    DEVNAME(sc));
758 			break;
759 		}
760 	case SIOCGIFMEDIA:
761 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCxIFMEDIA (Get/Set Interface Media)");
762 		error = ifmedia_ioctl(ifp, ifr, &sc->media, command);
763 		break;
764 
765 	case SIOCGIFRXR:
766 		error = if_rxr_ioctl((struct if_rxrinfo *)ifr->ifr_data,
767 		    NULL, EM_MCLBYTES, &sc->queues->rx.sc_rx_ring);
768 		break;
769 
770 	case SIOCGIFSFFPAGE:
771 		error = rw_enter(&sc->sfflock, RW_WRITE|RW_INTR);
772 		if (error != 0)
773 			break;
774 
775 		error = em_get_sffpage(sc, (struct if_sffpage *)data);
776 		rw_exit(&sc->sfflock);
777 		break;
778 
779 	default:
780 		error = ether_ioctl(ifp, &sc->sc_ac, command, data);
781 	}
782 
783 	if (error == ENETRESET) {
784 		if (ifp->if_flags & IFF_RUNNING) {
785 			em_disable_intr(sc);
786 			em_iff(sc);
787 			if (sc->hw.mac_type == em_82542_rev2_0)
788 				em_initialize_receive_unit(sc);
789 			em_enable_intr(sc);
790 		}
791 		error = 0;
792 	}
793 
794 	splx(s);
795 	return (error);
796 }
797 
798 /*********************************************************************
799  *  Watchdog entry point
800  *
801  *  This routine is called whenever hardware quits transmitting.
802  *
803  **********************************************************************/
804 
805 void
806 em_watchdog(struct ifnet *ifp)
807 {
808 	struct em_softc *sc = ifp->if_softc;
809 	struct em_queue *que = sc->queues; /* Use only first queue. */
810 
811 
812 	/* If we are in this routine because of pause frames, then
813 	 * don't reset the hardware.
814 	 */
815 	if (E1000_READ_REG(&sc->hw, STATUS) & E1000_STATUS_TXOFF) {
816 		ifp->if_timer = EM_TX_TIMEOUT;
817 		return;
818 	}
819 	printf("%s: watchdog: head %u tail %u TDH %u TDT %u\n",
820 	    DEVNAME(sc),
821 	    que->tx.sc_tx_desc_head, que->tx.sc_tx_desc_tail,
822 	    E1000_READ_REG(&sc->hw, TDH(que->me)),
823 	    E1000_READ_REG(&sc->hw, TDT(que->me)));
824 
825 	em_init(sc);
826 
827 	sc->watchdog_events++;
828 }
829 
830 /*********************************************************************
831  *  Init entry point
832  *
833  *  This routine is used in two ways. It is used by the stack as
834  *  init entry point in network interface structure. It is also used
835  *  by the driver as a hw/sw initialization routine to get to a
836  *  consistent state.
837  *
838  **********************************************************************/
839 
840 void
841 em_init(void *arg)
842 {
843 	struct em_softc *sc = arg;
844 	struct ifnet   *ifp = &sc->sc_ac.ac_if;
845 	uint32_t	pba;
846 	int s;
847 
848 	s = splnet();
849 
850 	INIT_DEBUGOUT("em_init: begin");
851 
852 	em_stop(sc, 0);
853 
854 	/*
855 	 * Packet Buffer Allocation (PBA)
856 	 * Writing PBA sets the receive portion of the buffer
857 	 * the remainder is used for the transmit buffer.
858 	 *
859 	 * Devices before the 82547 had a Packet Buffer of 64K.
860 	 *   Default allocation: PBA=48K for Rx, leaving 16K for Tx.
861 	 * After the 82547 the buffer was reduced to 40K.
862 	 *   Default allocation: PBA=30K for Rx, leaving 10K for Tx.
863 	 *   Note: default does not leave enough room for Jumbo Frame >10k.
864 	 */
865 	switch (sc->hw.mac_type) {
866 	case em_82547:
867 	case em_82547_rev_2: /* 82547: Total Packet Buffer is 40K */
868 		if (sc->hw.max_frame_size > EM_RXBUFFER_8192)
869 			pba = E1000_PBA_22K; /* 22K for Rx, 18K for Tx */
870 		else
871 			pba = E1000_PBA_30K; /* 30K for Rx, 10K for Tx */
872 		sc->tx_fifo_head = 0;
873 		sc->tx_head_addr = pba << EM_TX_HEAD_ADDR_SHIFT;
874 		sc->tx_fifo_size = (E1000_PBA_40K - pba) << EM_PBA_BYTES_SHIFT;
875 		break;
876 	case em_82571:
877 	case em_82572: /* Total Packet Buffer on these is 48k */
878 	case em_82575:
879 	case em_82576:
880 	case em_82580:
881 	case em_80003es2lan:
882 	case em_i350:
883 		pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
884 		break;
885 	case em_i210:
886 		pba = E1000_PBA_34K;
887 		break;
888 	case em_82573: /* 82573: Total Packet Buffer is 32K */
889 		/* Jumbo frames not supported */
890 		pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
891 		break;
892 	case em_82574: /* Total Packet Buffer is 40k */
893 		pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
894 		break;
895 	case em_ich8lan:
896 		pba = E1000_PBA_8K;
897 		break;
898 	case em_ich9lan:
899 	case em_ich10lan:
900 		/* Boost Receive side for jumbo frames */
901 		if (sc->hw.max_frame_size > EM_RXBUFFER_4096)
902 			pba = E1000_PBA_14K;
903 		else
904 			pba = E1000_PBA_10K;
905 		break;
906 	case em_pchlan:
907 	case em_pch2lan:
908 	case em_pch_lpt:
909 	case em_pch_spt:
910 	case em_pch_cnp:
911 		pba = E1000_PBA_26K;
912 		break;
913 	default:
914 		/* Devices before 82547 had a Packet Buffer of 64K.   */
915 		if (sc->hw.max_frame_size > EM_RXBUFFER_8192)
916 			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
917 		else
918 			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
919 	}
920 	INIT_DEBUGOUT1("em_init: pba=%dK",pba);
921 	E1000_WRITE_REG(&sc->hw, PBA, pba);
922 
923 	/* Get the latest mac address, User can use a LAA */
924 	bcopy(sc->sc_ac.ac_enaddr, sc->hw.mac_addr, ETHER_ADDR_LEN);
925 
926 	/* Initialize the hardware */
927 	if (em_hardware_init(sc)) {
928 		printf("%s: Unable to initialize the hardware\n",
929 		       DEVNAME(sc));
930 		splx(s);
931 		return;
932 	}
933 	em_update_link_status(sc);
934 
935 	E1000_WRITE_REG(&sc->hw, VET, ETHERTYPE_VLAN);
936 	if (ifp->if_capabilities & IFCAP_VLAN_HWTAGGING)
937 		em_enable_hw_vlans(sc);
938 
939 	/* Prepare transmit descriptors and buffers */
940 	if (em_setup_transmit_structures(sc)) {
941 		printf("%s: Could not setup transmit structures\n",
942 		       DEVNAME(sc));
943 		em_stop(sc, 0);
944 		splx(s);
945 		return;
946 	}
947 	em_initialize_transmit_unit(sc);
948 
949 	/* Prepare receive descriptors and buffers */
950 	if (em_setup_receive_structures(sc)) {
951 		printf("%s: Could not setup receive structures\n",
952 		       DEVNAME(sc));
953 		em_stop(sc, 0);
954 		splx(s);
955 		return;
956 	}
957 	em_initialize_receive_unit(sc);
958 
959 #ifndef SMALL_KERNEL
960 	if (sc->msix) {
961 		if (em_setup_queues_msix(sc)) {
962 			printf("%s: Can't setup msix queues\n", DEVNAME(sc));
963 			splx(s);
964 			return;
965 		}
966 	}
967 #endif
968 
969 	/* Program promiscuous mode and multicast filters. */
970 	em_iff(sc);
971 
972 	ifp->if_flags |= IFF_RUNNING;
973 	ifq_clr_oactive(&ifp->if_snd);
974 
975 	timeout_add_sec(&sc->timer_handle, 1);
976 	em_clear_hw_cntrs(&sc->hw);
977 	em_enable_intr(sc);
978 
979 	/* Don't reset the phy next time init gets called */
980 	sc->hw.phy_reset_disable = TRUE;
981 
982 	splx(s);
983 }
984 
985 /*********************************************************************
986  *
987  *  Interrupt Service routine
988  *
989  **********************************************************************/
990 int
991 em_intr(void *arg)
992 {
993 	struct em_softc	*sc = arg;
994 	struct em_queue *que = sc->queues; /* single queue */
995 	struct ifnet	*ifp = &sc->sc_ac.ac_if;
996 	u_int32_t	reg_icr, test_icr;
997 
998 	test_icr = reg_icr = E1000_READ_REG(&sc->hw, ICR);
999 	if (sc->hw.mac_type >= em_82571)
1000 		test_icr = (reg_icr & E1000_ICR_INT_ASSERTED);
1001 	if (!test_icr)
1002 		return (0);
1003 
1004 	if (ifp->if_flags & IFF_RUNNING) {
1005 		em_txeof(que);
1006 		if (em_rxeof(que))
1007 			em_rxrefill(que);
1008 	}
1009 
1010 	/* Link status change */
1011 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1012 		KERNEL_LOCK();
1013 		sc->hw.get_link_status = 1;
1014 		em_check_for_link(&sc->hw);
1015 		em_update_link_status(sc);
1016 		KERNEL_UNLOCK();
1017 	}
1018 
1019 	return (1);
1020 }
1021 
1022 /*********************************************************************
1023  *
1024  *  Media Ioctl callback
1025  *
1026  *  This routine is called whenever the user queries the status of
1027  *  the interface using ifconfig.
1028  *
1029  **********************************************************************/
1030 void
1031 em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1032 {
1033 	struct em_softc *sc = ifp->if_softc;
1034 	uint64_t fiber_type = IFM_1000_SX;
1035 	u_int16_t gsr;
1036 
1037 	INIT_DEBUGOUT("em_media_status: begin");
1038 
1039 	em_check_for_link(&sc->hw);
1040 	em_update_link_status(sc);
1041 
1042 	ifmr->ifm_status = IFM_AVALID;
1043 	ifmr->ifm_active = IFM_ETHER;
1044 
1045 	if (!sc->link_active) {
1046 		ifmr->ifm_active |= IFM_NONE;
1047 		return;
1048 	}
1049 
1050 	ifmr->ifm_status |= IFM_ACTIVE;
1051 
1052 	if (sc->hw.media_type == em_media_type_fiber ||
1053 	    sc->hw.media_type == em_media_type_internal_serdes) {
1054 		if (sc->hw.mac_type == em_82545)
1055 			fiber_type = IFM_1000_LX;
1056 		ifmr->ifm_active |= fiber_type | IFM_FDX;
1057 	} else {
1058 		switch (sc->link_speed) {
1059 		case 10:
1060 			ifmr->ifm_active |= IFM_10_T;
1061 			break;
1062 		case 100:
1063 			ifmr->ifm_active |= IFM_100_TX;
1064 			break;
1065 		case 1000:
1066 			ifmr->ifm_active |= IFM_1000_T;
1067 			break;
1068 		}
1069 
1070 		if (sc->link_duplex == FULL_DUPLEX)
1071 			ifmr->ifm_active |= em_flowstatus(sc) | IFM_FDX;
1072 		else
1073 			ifmr->ifm_active |= IFM_HDX;
1074 
1075 		if (IFM_SUBTYPE(ifmr->ifm_active) == IFM_1000_T) {
1076 			em_read_phy_reg(&sc->hw, PHY_1000T_STATUS, &gsr);
1077 			if (gsr & SR_1000T_MS_CONFIG_RES)
1078 				ifmr->ifm_active |= IFM_ETH_MASTER;
1079 		}
1080 	}
1081 }
1082 
1083 /*********************************************************************
1084  *
1085  *  Media Ioctl callback
1086  *
1087  *  This routine is called when the user changes speed/duplex using
1088  *  media/mediopt option with ifconfig.
1089  *
1090  **********************************************************************/
1091 int
1092 em_media_change(struct ifnet *ifp)
1093 {
1094 	struct em_softc *sc = ifp->if_softc;
1095 	struct ifmedia	*ifm = &sc->media;
1096 
1097 	INIT_DEBUGOUT("em_media_change: begin");
1098 
1099 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1100 		return (EINVAL);
1101 
1102 	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1103 	case IFM_AUTO:
1104 		sc->hw.autoneg = DO_AUTO_NEG;
1105 		sc->hw.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1106 		break;
1107 	case IFM_1000_LX:
1108 	case IFM_1000_SX:
1109 	case IFM_1000_T:
1110 		sc->hw.autoneg = DO_AUTO_NEG;
1111 		sc->hw.autoneg_advertised = ADVERTISE_1000_FULL;
1112 		break;
1113 	case IFM_100_TX:
1114 		sc->hw.autoneg = FALSE;
1115 		sc->hw.autoneg_advertised = 0;
1116 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1117 			sc->hw.forced_speed_duplex = em_100_full;
1118 		else
1119 			sc->hw.forced_speed_duplex = em_100_half;
1120 		break;
1121 	case IFM_10_T:
1122 		sc->hw.autoneg = FALSE;
1123 		sc->hw.autoneg_advertised = 0;
1124 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1125 			sc->hw.forced_speed_duplex = em_10_full;
1126 		else
1127 			sc->hw.forced_speed_duplex = em_10_half;
1128 		break;
1129 	default:
1130 		printf("%s: Unsupported media type\n", DEVNAME(sc));
1131 	}
1132 
1133 	/*
1134 	 * As the speed/duplex settings may have changed we need to
1135 	 * reset the PHY.
1136 	 */
1137 	sc->hw.phy_reset_disable = FALSE;
1138 
1139 	em_init(sc);
1140 
1141 	return (0);
1142 }
1143 
1144 uint64_t
1145 em_flowstatus(struct em_softc *sc)
1146 {
1147 	u_int16_t ar, lpar;
1148 
1149 	if (sc->hw.media_type == em_media_type_fiber ||
1150 	    sc->hw.media_type == em_media_type_internal_serdes)
1151 		return (0);
1152 
1153 	em_read_phy_reg(&sc->hw, PHY_AUTONEG_ADV, &ar);
1154 	em_read_phy_reg(&sc->hw, PHY_LP_ABILITY, &lpar);
1155 
1156 	if ((ar & NWAY_AR_PAUSE) && (lpar & NWAY_LPAR_PAUSE))
1157 		return (IFM_FLOW|IFM_ETH_TXPAUSE|IFM_ETH_RXPAUSE);
1158 	else if (!(ar & NWAY_AR_PAUSE) && (ar & NWAY_AR_ASM_DIR) &&
1159 		(lpar & NWAY_LPAR_PAUSE) && (lpar & NWAY_LPAR_ASM_DIR))
1160 		return (IFM_FLOW|IFM_ETH_TXPAUSE);
1161 	else if ((ar & NWAY_AR_PAUSE) && (ar & NWAY_AR_ASM_DIR) &&
1162 		!(lpar & NWAY_LPAR_PAUSE) && (lpar & NWAY_LPAR_ASM_DIR))
1163 		return (IFM_FLOW|IFM_ETH_RXPAUSE);
1164 
1165 	return (0);
1166 }
1167 
1168 /*********************************************************************
1169  *
1170  *  This routine maps the mbufs to tx descriptors.
1171  *
1172  *  return 0 on success, positive on failure
1173  **********************************************************************/
1174 u_int
1175 em_encap(struct em_queue *que, struct mbuf *m)
1176 {
1177 	struct em_softc *sc = que->sc;
1178 	struct em_packet *pkt;
1179 	struct em_tx_desc *desc;
1180 	bus_dmamap_t map;
1181 	u_int32_t txd_upper, txd_lower;
1182 	u_int head, last, used = 0;
1183 	int i, j;
1184 
1185 	/* For 82544 Workaround */
1186 	DESC_ARRAY		desc_array;
1187 	u_int32_t		array_elements;
1188 
1189 	/* get a dmamap for this packet from the next free slot */
1190 	head = que->tx.sc_tx_desc_head;
1191 	pkt = &que->tx.sc_tx_pkts_ring[head];
1192 	map = pkt->pkt_map;
1193 
1194 	switch (bus_dmamap_load_mbuf(sc->sc_dmat, map, m, BUS_DMA_NOWAIT)) {
1195 	case 0:
1196 		break;
1197 	case EFBIG:
1198 		if (m_defrag(m, M_DONTWAIT) == 0 &&
1199 		    bus_dmamap_load_mbuf(sc->sc_dmat, map, m,
1200 		     BUS_DMA_NOWAIT) == 0)
1201 			break;
1202 
1203 		/* FALLTHROUGH */
1204 	default:
1205 		sc->no_tx_dma_setup++;
1206 		return (0);
1207 	}
1208 
1209 	bus_dmamap_sync(sc->sc_dmat, map,
1210 	    0, map->dm_mapsize,
1211 	    BUS_DMASYNC_PREWRITE);
1212 
1213 	if (sc->hw.mac_type == em_82547) {
1214 		bus_dmamap_sync(sc->sc_dmat, que->tx.sc_tx_dma.dma_map,
1215 		    0, que->tx.sc_tx_dma.dma_map->dm_mapsize,
1216 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1217 	}
1218 
1219 	if (sc->hw.mac_type >= em_82543 && sc->hw.mac_type != em_82575 &&
1220 	    sc->hw.mac_type != em_82576 &&
1221 	    sc->hw.mac_type != em_82580 && sc->hw.mac_type != em_i210 &&
1222 	    sc->hw.mac_type != em_i350) {
1223 		used += em_transmit_checksum_setup(que, m, head,
1224 		    &txd_upper, &txd_lower);
1225 	} else {
1226 		txd_upper = txd_lower = 0;
1227 	}
1228 
1229 	head += used;
1230 	if (head >= sc->sc_tx_slots)
1231 		head -= sc->sc_tx_slots;
1232 
1233 	for (i = 0; i < map->dm_nsegs; i++) {
1234 		/* If sc is 82544 and on PCI-X bus */
1235 		if (sc->pcix_82544) {
1236 			/*
1237 			 * Check the Address and Length combination and
1238 			 * split the data accordingly
1239 			 */
1240 			array_elements = em_fill_descriptors(
1241 			    map->dm_segs[i].ds_addr, map->dm_segs[i].ds_len,
1242 			    &desc_array);
1243 			for (j = 0; j < array_elements; j++) {
1244 				desc = &que->tx.sc_tx_desc_ring[head];
1245 
1246 				desc->buffer_addr = htole64(
1247 					desc_array.descriptor[j].address);
1248 				desc->lower.data = htole32(
1249 					(que->tx.sc_txd_cmd | txd_lower |
1250 					 (u_int16_t)desc_array.descriptor[j].length));
1251 				desc->upper.data = htole32(txd_upper);
1252 
1253 				last = head;
1254 				if (++head == sc->sc_tx_slots)
1255 					head = 0;
1256 
1257 				used++;
1258 			}
1259 		} else {
1260 			desc = &que->tx.sc_tx_desc_ring[head];
1261 
1262 			desc->buffer_addr = htole64(map->dm_segs[i].ds_addr);
1263 			desc->lower.data = htole32(que->tx.sc_txd_cmd |
1264 			    txd_lower | map->dm_segs[i].ds_len);
1265 			desc->upper.data = htole32(txd_upper);
1266 
1267 			last = head;
1268 			if (++head == sc->sc_tx_slots)
1269 	        		head = 0;
1270 
1271 			used++;
1272 		}
1273 	}
1274 
1275 #if NVLAN > 0
1276 	/* Find out if we are in VLAN mode */
1277 	if (m->m_flags & M_VLANTAG) {
1278 		/* Set the VLAN id */
1279 		desc->upper.fields.special = htole16(m->m_pkthdr.ether_vtag);
1280 
1281 		/* Tell hardware to add tag */
1282 		desc->lower.data |= htole32(E1000_TXD_CMD_VLE);
1283 	}
1284 #endif
1285 
1286 	/* mark the packet with the mbuf and last desc slot */
1287 	pkt->pkt_m = m;
1288 	pkt->pkt_eop = last;
1289 
1290 	que->tx.sc_tx_desc_head = head;
1291 
1292 	/*
1293 	 * Last Descriptor of Packet
1294 	 * needs End Of Packet (EOP)
1295 	 * and Report Status (RS)
1296 	 */
1297 	desc->lower.data |= htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
1298 
1299 	if (sc->hw.mac_type == em_82547) {
1300 		bus_dmamap_sync(sc->sc_dmat, que->tx.sc_tx_dma.dma_map,
1301 		    0, que->tx.sc_tx_dma.dma_map->dm_mapsize,
1302 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1303 	}
1304 
1305 	return (used);
1306 }
1307 
1308 /*********************************************************************
1309  *
1310  * 82547 workaround to avoid controller hang in half-duplex environment.
1311  * The workaround is to avoid queuing a large packet that would span
1312  * the internal Tx FIFO ring boundary. We need to reset the FIFO pointers
1313  * in this case. We do that only when FIFO is quiescent.
1314  *
1315  **********************************************************************/
1316 void
1317 em_82547_move_tail_locked(struct em_softc *sc)
1318 {
1319 	uint16_t hw_tdt;
1320 	uint16_t sw_tdt;
1321 	struct em_tx_desc *tx_desc;
1322 	uint16_t length = 0;
1323 	boolean_t eop = 0;
1324 	struct em_queue *que = sc->queues; /* single queue chip */
1325 
1326 	hw_tdt = E1000_READ_REG(&sc->hw, TDT(que->me));
1327 	sw_tdt = que->tx.sc_tx_desc_head;
1328 
1329 	while (hw_tdt != sw_tdt) {
1330 		tx_desc = &que->tx.sc_tx_desc_ring[hw_tdt];
1331 		length += tx_desc->lower.flags.length;
1332 		eop = tx_desc->lower.data & E1000_TXD_CMD_EOP;
1333 		if (++hw_tdt == sc->sc_tx_slots)
1334 			hw_tdt = 0;
1335 
1336 		if (eop) {
1337 			if (em_82547_fifo_workaround(sc, length)) {
1338 				sc->tx_fifo_wrk_cnt++;
1339 				timeout_add(&sc->tx_fifo_timer_handle, 1);
1340 				break;
1341 			}
1342 			E1000_WRITE_REG(&sc->hw, TDT(que->me), hw_tdt);
1343 			em_82547_update_fifo_head(sc, length);
1344 			length = 0;
1345 		}
1346 	}
1347 }
1348 
1349 void
1350 em_82547_move_tail(void *arg)
1351 {
1352 	struct em_softc *sc = arg;
1353 	int s;
1354 
1355 	s = splnet();
1356 	em_82547_move_tail_locked(sc);
1357 	splx(s);
1358 }
1359 
1360 int
1361 em_82547_fifo_workaround(struct em_softc *sc, int len)
1362 {
1363 	int fifo_space, fifo_pkt_len;
1364 
1365 	fifo_pkt_len = EM_ROUNDUP(len + EM_FIFO_HDR, EM_FIFO_HDR);
1366 
1367 	if (sc->link_duplex == HALF_DUPLEX) {
1368 		fifo_space = sc->tx_fifo_size - sc->tx_fifo_head;
1369 
1370 		if (fifo_pkt_len >= (EM_82547_PKT_THRESH + fifo_space)) {
1371 			if (em_82547_tx_fifo_reset(sc))
1372 				return (0);
1373 			else
1374 				return (1);
1375 		}
1376 	}
1377 
1378 	return (0);
1379 }
1380 
1381 void
1382 em_82547_update_fifo_head(struct em_softc *sc, int len)
1383 {
1384 	int fifo_pkt_len = EM_ROUNDUP(len + EM_FIFO_HDR, EM_FIFO_HDR);
1385 
1386 	/* tx_fifo_head is always 16 byte aligned */
1387 	sc->tx_fifo_head += fifo_pkt_len;
1388 	if (sc->tx_fifo_head >= sc->tx_fifo_size)
1389 		sc->tx_fifo_head -= sc->tx_fifo_size;
1390 }
1391 
1392 int
1393 em_82547_tx_fifo_reset(struct em_softc *sc)
1394 {
1395 	uint32_t tctl;
1396 	struct em_queue *que = sc->queues; /* single queue chip */
1397 
1398 	if ((E1000_READ_REG(&sc->hw, TDT(que->me)) ==
1399 	     E1000_READ_REG(&sc->hw, TDH(que->me))) &&
1400 	    (E1000_READ_REG(&sc->hw, TDFT) ==
1401 	     E1000_READ_REG(&sc->hw, TDFH)) &&
1402 	    (E1000_READ_REG(&sc->hw, TDFTS) ==
1403 	     E1000_READ_REG(&sc->hw, TDFHS)) &&
1404 	    (E1000_READ_REG(&sc->hw, TDFPC) == 0)) {
1405 
1406 		/* Disable TX unit */
1407 		tctl = E1000_READ_REG(&sc->hw, TCTL);
1408 		E1000_WRITE_REG(&sc->hw, TCTL, tctl & ~E1000_TCTL_EN);
1409 
1410 		/* Reset FIFO pointers */
1411 		E1000_WRITE_REG(&sc->hw, TDFT, sc->tx_head_addr);
1412 		E1000_WRITE_REG(&sc->hw, TDFH, sc->tx_head_addr);
1413 		E1000_WRITE_REG(&sc->hw, TDFTS, sc->tx_head_addr);
1414 		E1000_WRITE_REG(&sc->hw, TDFHS, sc->tx_head_addr);
1415 
1416 		/* Re-enable TX unit */
1417 		E1000_WRITE_REG(&sc->hw, TCTL, tctl);
1418 		E1000_WRITE_FLUSH(&sc->hw);
1419 
1420 		sc->tx_fifo_head = 0;
1421 		sc->tx_fifo_reset_cnt++;
1422 
1423 		return (TRUE);
1424 	} else
1425 		return (FALSE);
1426 }
1427 
1428 void
1429 em_iff(struct em_softc *sc)
1430 {
1431 	struct ifnet *ifp = &sc->sc_ac.ac_if;
1432 	struct arpcom *ac = &sc->sc_ac;
1433 	u_int32_t reg_rctl = 0;
1434 	u_int8_t  mta[MAX_NUM_MULTICAST_ADDRESSES * ETH_LENGTH_OF_ADDRESS];
1435 	struct ether_multi *enm;
1436 	struct ether_multistep step;
1437 	int i = 0;
1438 
1439 	IOCTL_DEBUGOUT("em_iff: begin");
1440 
1441 	if (sc->hw.mac_type == em_82542_rev2_0) {
1442 		reg_rctl = E1000_READ_REG(&sc->hw, RCTL);
1443 		if (sc->hw.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
1444 			em_pci_clear_mwi(&sc->hw);
1445 		reg_rctl |= E1000_RCTL_RST;
1446 		E1000_WRITE_REG(&sc->hw, RCTL, reg_rctl);
1447 		msec_delay(5);
1448 	}
1449 
1450 	reg_rctl = E1000_READ_REG(&sc->hw, RCTL);
1451 	reg_rctl &= ~(E1000_RCTL_MPE | E1000_RCTL_UPE);
1452 	ifp->if_flags &= ~IFF_ALLMULTI;
1453 
1454 	if (ifp->if_flags & IFF_PROMISC || ac->ac_multirangecnt > 0 ||
1455 	    ac->ac_multicnt > MAX_NUM_MULTICAST_ADDRESSES) {
1456 		ifp->if_flags |= IFF_ALLMULTI;
1457 		reg_rctl |= E1000_RCTL_MPE;
1458 		if (ifp->if_flags & IFF_PROMISC)
1459 			reg_rctl |= E1000_RCTL_UPE;
1460 	} else {
1461 		ETHER_FIRST_MULTI(step, ac, enm);
1462 		while (enm != NULL) {
1463 			bcopy(enm->enm_addrlo, mta + i, ETH_LENGTH_OF_ADDRESS);
1464 			i += ETH_LENGTH_OF_ADDRESS;
1465 
1466 			ETHER_NEXT_MULTI(step, enm);
1467 		}
1468 
1469 		em_mc_addr_list_update(&sc->hw, mta, ac->ac_multicnt, 0, 1);
1470 	}
1471 
1472 	E1000_WRITE_REG(&sc->hw, RCTL, reg_rctl);
1473 
1474 	if (sc->hw.mac_type == em_82542_rev2_0) {
1475 		reg_rctl = E1000_READ_REG(&sc->hw, RCTL);
1476 		reg_rctl &= ~E1000_RCTL_RST;
1477 		E1000_WRITE_REG(&sc->hw, RCTL, reg_rctl);
1478 		msec_delay(5);
1479 		if (sc->hw.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
1480 			em_pci_set_mwi(&sc->hw);
1481 	}
1482 }
1483 
1484 /*********************************************************************
1485  *  Timer routine
1486  *
1487  *  This routine checks for link status and updates statistics.
1488  *
1489  **********************************************************************/
1490 
1491 void
1492 em_local_timer(void *arg)
1493 {
1494 	struct em_softc *sc = arg;
1495 	int s;
1496 
1497 	timeout_add_sec(&sc->timer_handle, 1);
1498 
1499 	s = splnet();
1500 	em_smartspeed(sc);
1501 	splx(s);
1502 
1503 #if NKSTAT > 0
1504 	if (sc->kstat != NULL && mtx_enter_try(&sc->kstat_mtx)) {
1505 		em_kstat_read(sc->kstat);
1506 		mtx_leave(&sc->kstat_mtx);
1507 	}
1508 #endif
1509 }
1510 
1511 void
1512 em_update_link_status(struct em_softc *sc)
1513 {
1514 	struct ifnet *ifp = &sc->sc_ac.ac_if;
1515 	u_char link_state;
1516 
1517 	if (E1000_READ_REG(&sc->hw, STATUS) & E1000_STATUS_LU) {
1518 		if (sc->link_active == 0) {
1519 			em_get_speed_and_duplex(&sc->hw,
1520 						&sc->link_speed,
1521 						&sc->link_duplex);
1522 			/* Check if we may set SPEED_MODE bit on PCI-E */
1523 			if ((sc->link_speed == SPEED_1000) &&
1524 			    ((sc->hw.mac_type == em_82571) ||
1525 			    (sc->hw.mac_type == em_82572) ||
1526 			    (sc->hw.mac_type == em_82575) ||
1527 			    (sc->hw.mac_type == em_82576) ||
1528 			    (sc->hw.mac_type == em_82580))) {
1529 				int tarc0;
1530 
1531 				tarc0 = E1000_READ_REG(&sc->hw, TARC0);
1532 				tarc0 |= SPEED_MODE_BIT;
1533 				E1000_WRITE_REG(&sc->hw, TARC0, tarc0);
1534 			}
1535 			sc->link_active = 1;
1536 			sc->smartspeed = 0;
1537 			ifp->if_baudrate = IF_Mbps(sc->link_speed);
1538 		}
1539 		link_state = (sc->link_duplex == FULL_DUPLEX) ?
1540 		    LINK_STATE_FULL_DUPLEX : LINK_STATE_HALF_DUPLEX;
1541 	} else {
1542 		if (sc->link_active == 1) {
1543 			ifp->if_baudrate = sc->link_speed = 0;
1544 			sc->link_duplex = 0;
1545 			sc->link_active = 0;
1546 		}
1547 		link_state = LINK_STATE_DOWN;
1548 	}
1549 	if (ifp->if_link_state != link_state) {
1550 		ifp->if_link_state = link_state;
1551 		if_link_state_change(ifp);
1552 	}
1553 }
1554 
1555 /*********************************************************************
1556  *
1557  *  This routine disables all traffic on the adapter by issuing a
1558  *  global reset on the MAC and deallocates TX/RX buffers.
1559  *
1560  **********************************************************************/
1561 
1562 void
1563 em_stop(void *arg, int softonly)
1564 {
1565 	struct em_softc *sc = arg;
1566 	struct em_queue *que = sc->queues; /* Use only first queue. */
1567 	struct ifnet   *ifp = &sc->sc_ac.ac_if;
1568 
1569 	/* Tell the stack that the interface is no longer active */
1570 	ifp->if_flags &= ~IFF_RUNNING;
1571 
1572 	INIT_DEBUGOUT("em_stop: begin");
1573 
1574 	timeout_del(&que->rx_refill);
1575 	timeout_del(&sc->timer_handle);
1576 	timeout_del(&sc->tx_fifo_timer_handle);
1577 
1578 	if (!softonly)
1579 		em_disable_intr(sc);
1580 	if (sc->hw.mac_type >= em_pch_spt)
1581 		em_flush_desc_rings(sc);
1582 	if (!softonly)
1583 		em_reset_hw(&sc->hw);
1584 
1585 	intr_barrier(sc->sc_intrhand);
1586 	ifq_barrier(&ifp->if_snd);
1587 
1588 	KASSERT((ifp->if_flags & IFF_RUNNING) == 0);
1589 
1590 	ifq_clr_oactive(&ifp->if_snd);
1591 	ifp->if_timer = 0;
1592 
1593 	em_free_transmit_structures(sc);
1594 	em_free_receive_structures(sc);
1595 }
1596 
1597 /*********************************************************************
1598  *
1599  *  Determine hardware revision.
1600  *
1601  **********************************************************************/
1602 void
1603 em_identify_hardware(struct em_softc *sc)
1604 {
1605 	u_int32_t reg;
1606 	struct pci_attach_args *pa = &sc->osdep.em_pa;
1607 
1608 	/* Make sure our PCI config space has the necessary stuff set */
1609 	sc->hw.pci_cmd_word = pci_conf_read(pa->pa_pc, pa->pa_tag,
1610 					    PCI_COMMAND_STATUS_REG);
1611 
1612 	/* Save off the information about this board */
1613 	sc->hw.vendor_id = PCI_VENDOR(pa->pa_id);
1614 	sc->hw.device_id = PCI_PRODUCT(pa->pa_id);
1615 
1616 	reg = pci_conf_read(pa->pa_pc, pa->pa_tag, PCI_CLASS_REG);
1617 	sc->hw.revision_id = PCI_REVISION(reg);
1618 
1619 	reg = pci_conf_read(pa->pa_pc, pa->pa_tag, PCI_SUBSYS_ID_REG);
1620 	sc->hw.subsystem_vendor_id = PCI_VENDOR(reg);
1621 	sc->hw.subsystem_id = PCI_PRODUCT(reg);
1622 
1623 	/* Identify the MAC */
1624 	if (em_set_mac_type(&sc->hw))
1625 		printf("%s: Unknown MAC Type\n", DEVNAME(sc));
1626 
1627 	if (sc->hw.mac_type == em_pchlan)
1628 		sc->hw.revision_id = PCI_PRODUCT(pa->pa_id) & 0x0f;
1629 
1630 	if (sc->hw.mac_type == em_82541 ||
1631 	    sc->hw.mac_type == em_82541_rev_2 ||
1632 	    sc->hw.mac_type == em_82547 ||
1633 	    sc->hw.mac_type == em_82547_rev_2)
1634 		sc->hw.phy_init_script = TRUE;
1635 }
1636 
1637 void
1638 em_legacy_irq_quirk_spt(struct em_softc *sc)
1639 {
1640 	uint32_t	reg;
1641 
1642 	/* Legacy interrupt: SPT needs a quirk. */
1643 	if (sc->hw.mac_type != em_pch_spt && sc->hw.mac_type != em_pch_cnp)
1644 		return;
1645 	if (sc->legacy_irq == 0)
1646 		return;
1647 
1648 	reg = EM_READ_REG(&sc->hw, E1000_FEXTNVM7);
1649 	reg |= E1000_FEXTNVM7_SIDE_CLK_UNGATE;
1650 	EM_WRITE_REG(&sc->hw, E1000_FEXTNVM7, reg);
1651 
1652 	reg = EM_READ_REG(&sc->hw, E1000_FEXTNVM9);
1653 	reg |= E1000_FEXTNVM9_IOSFSB_CLKGATE_DIS |
1654 	    E1000_FEXTNVM9_IOSFSB_CLKREQ_DIS;
1655 	EM_WRITE_REG(&sc->hw, E1000_FEXTNVM9, reg);
1656 }
1657 
1658 int
1659 em_allocate_pci_resources(struct em_softc *sc)
1660 {
1661 	int		val, rid;
1662 	struct pci_attach_args *pa = &sc->osdep.em_pa;
1663 	struct em_queue	       *que = NULL;
1664 
1665 	val = pci_conf_read(pa->pa_pc, pa->pa_tag, EM_MMBA);
1666 	if (PCI_MAPREG_TYPE(val) != PCI_MAPREG_TYPE_MEM) {
1667 		printf(": mmba is not mem space\n");
1668 		return (ENXIO);
1669 	}
1670 	if (pci_mapreg_map(pa, EM_MMBA, PCI_MAPREG_MEM_TYPE(val), 0,
1671 	    &sc->osdep.mem_bus_space_tag, &sc->osdep.mem_bus_space_handle,
1672 	    &sc->osdep.em_membase, &sc->osdep.em_memsize, 0)) {
1673 		printf(": cannot find mem space\n");
1674 		return (ENXIO);
1675 	}
1676 
1677 	switch (sc->hw.mac_type) {
1678 	case em_82544:
1679 	case em_82540:
1680 	case em_82545:
1681 	case em_82546:
1682 	case em_82541:
1683 	case em_82541_rev_2:
1684 		/* Figure out where our I/O BAR is ? */
1685 		for (rid = PCI_MAPREG_START; rid < PCI_MAPREG_END;) {
1686 			val = pci_conf_read(pa->pa_pc, pa->pa_tag, rid);
1687 			if (PCI_MAPREG_TYPE(val) == PCI_MAPREG_TYPE_IO) {
1688 				sc->io_rid = rid;
1689 				break;
1690 			}
1691 			rid += 4;
1692 			if (PCI_MAPREG_MEM_TYPE(val) ==
1693 			    PCI_MAPREG_MEM_TYPE_64BIT)
1694 				rid += 4;	/* skip high bits, too */
1695 		}
1696 
1697 		if (pci_mapreg_map(pa, rid, PCI_MAPREG_TYPE_IO, 0,
1698 		    &sc->osdep.io_bus_space_tag, &sc->osdep.io_bus_space_handle,
1699 		    &sc->osdep.em_iobase, &sc->osdep.em_iosize, 0)) {
1700 			printf(": cannot find i/o space\n");
1701 			return (ENXIO);
1702 		}
1703 
1704 		sc->hw.io_base = 0;
1705 		break;
1706 	default:
1707 		break;
1708 	}
1709 
1710 	sc->osdep.em_flashoffset = 0;
1711 	/* for ICH8 and family we need to find the flash memory */
1712 	if (sc->hw.mac_type >= em_pch_spt) {
1713 		sc->osdep.flash_bus_space_tag = sc->osdep.mem_bus_space_tag;
1714 		sc->osdep.flash_bus_space_handle = sc->osdep.mem_bus_space_handle;
1715 		sc->osdep.em_flashbase = 0;
1716 		sc->osdep.em_flashsize = 0;
1717 		sc->osdep.em_flashoffset = 0xe000;
1718 	} else if (IS_ICH8(sc->hw.mac_type)) {
1719 		val = pci_conf_read(pa->pa_pc, pa->pa_tag, EM_FLASH);
1720 		if (PCI_MAPREG_TYPE(val) != PCI_MAPREG_TYPE_MEM) {
1721 			printf(": flash is not mem space\n");
1722 			return (ENXIO);
1723 		}
1724 
1725 		if (pci_mapreg_map(pa, EM_FLASH, PCI_MAPREG_MEM_TYPE(val), 0,
1726 		    &sc->osdep.flash_bus_space_tag, &sc->osdep.flash_bus_space_handle,
1727 		    &sc->osdep.em_flashbase, &sc->osdep.em_flashsize, 0)) {
1728 			printf(": cannot find mem space\n");
1729 			return (ENXIO);
1730 		}
1731         }
1732 
1733 	sc->osdep.dev = (struct device *)sc;
1734 	sc->hw.back = &sc->osdep;
1735 
1736 	/* Only one queue for the moment. */
1737 	que = malloc(sizeof(struct em_queue), M_DEVBUF, M_NOWAIT | M_ZERO);
1738 	if (que == NULL) {
1739 		printf(": unable to allocate queue memory\n");
1740 		return (ENOMEM);
1741 	}
1742 	que->me = 0;
1743 	que->sc = sc;
1744 	timeout_set(&que->rx_refill, em_rxrefill, que);
1745 
1746 	sc->queues = que;
1747 	sc->num_queues = 1;
1748 	sc->msix = 0;
1749 	sc->legacy_irq = 0;
1750 	if (em_allocate_msix(sc) && em_allocate_legacy(sc))
1751 		return (ENXIO);
1752 
1753 	/*
1754 	 * the ICP_xxxx device has multiple, duplicate register sets for
1755 	 * use when it is being used as a network processor. Disable those
1756 	 * registers here, as they are not necessary in this context and
1757 	 * can confuse the system
1758 	 */
1759 	if(sc->hw.mac_type == em_icp_xxxx) {
1760 		int offset;
1761 		pcireg_t val;
1762 
1763 		if (!pci_get_capability(sc->osdep.em_pa.pa_pc,
1764 		    sc->osdep.em_pa.pa_tag, PCI_CAP_ID_ST, &offset, &val)) {
1765 			return (0);
1766 		}
1767 		offset += PCI_ST_SMIA_OFFSET;
1768 		pci_conf_write(sc->osdep.em_pa.pa_pc, sc->osdep.em_pa.pa_tag,
1769 		    offset, 0x06);
1770 		E1000_WRITE_REG(&sc->hw, IMC1, ~0x0);
1771 		E1000_WRITE_REG(&sc->hw, IMC2, ~0x0);
1772 	}
1773 	return (0);
1774 }
1775 
1776 void
1777 em_free_pci_resources(struct em_softc *sc)
1778 {
1779 	struct pci_attach_args *pa = &sc->osdep.em_pa;
1780 	pci_chipset_tag_t	pc = pa->pa_pc;
1781 	struct em_queue	       *que = NULL;
1782 	if (sc->sc_intrhand)
1783 		pci_intr_disestablish(pc, sc->sc_intrhand);
1784 	sc->sc_intrhand = 0;
1785 
1786 	if (sc->osdep.em_flashbase)
1787 		bus_space_unmap(sc->osdep.flash_bus_space_tag, sc->osdep.flash_bus_space_handle,
1788 				sc->osdep.em_flashsize);
1789 	sc->osdep.em_flashbase = 0;
1790 
1791 	if (sc->osdep.em_iobase)
1792 		bus_space_unmap(sc->osdep.io_bus_space_tag, sc->osdep.io_bus_space_handle,
1793 				sc->osdep.em_iosize);
1794 	sc->osdep.em_iobase = 0;
1795 
1796 	if (sc->osdep.em_membase)
1797 		bus_space_unmap(sc->osdep.mem_bus_space_tag, sc->osdep.mem_bus_space_handle,
1798 				sc->osdep.em_memsize);
1799 	sc->osdep.em_membase = 0;
1800 
1801 	FOREACH_QUEUE(sc, que) {
1802 		if (que->rx.sc_rx_desc_ring != NULL) {
1803 			que->rx.sc_rx_desc_ring = NULL;
1804 			em_dma_free(sc, &que->rx.sc_rx_dma);
1805 		}
1806 		if (que->tx.sc_tx_desc_ring != NULL) {
1807 			que->tx.sc_tx_desc_ring = NULL;
1808 			em_dma_free(sc, &que->tx.sc_tx_dma);
1809 		}
1810 		if (que->tag)
1811 			pci_intr_disestablish(pc, que->tag);
1812 		que->tag = NULL;
1813 		que->eims = 0;
1814 		que->me = 0;
1815 		que->sc = NULL;
1816 	}
1817 	sc->legacy_irq = 0;
1818 	sc->msix_linkvec = 0;
1819 	sc->msix_queuesmask = 0;
1820 	if (sc->queues)
1821 		free(sc->queues, M_DEVBUF,
1822 		    sc->num_queues * sizeof(struct em_queue));
1823 	sc->num_queues = 0;
1824 	sc->queues = NULL;
1825 }
1826 
1827 /*********************************************************************
1828  *
1829  *  Initialize the hardware to a configuration as specified by the
1830  *  em_softc structure. The controller is reset, the EEPROM is
1831  *  verified, the MAC address is set, then the shared initialization
1832  *  routines are called.
1833  *
1834  **********************************************************************/
1835 int
1836 em_hardware_init(struct em_softc *sc)
1837 {
1838 	uint32_t ret_val;
1839 	u_int16_t rx_buffer_size;
1840 
1841 	INIT_DEBUGOUT("em_hardware_init: begin");
1842 	if (sc->hw.mac_type >= em_pch_spt)
1843 		em_flush_desc_rings(sc);
1844 	/* Issue a global reset */
1845 	em_reset_hw(&sc->hw);
1846 
1847 	/* When hardware is reset, fifo_head is also reset */
1848 	sc->tx_fifo_head = 0;
1849 
1850 	/* Make sure we have a good EEPROM before we read from it */
1851 	if (em_get_flash_presence_i210(&sc->hw) &&
1852 	    em_validate_eeprom_checksum(&sc->hw) < 0) {
1853 		/*
1854 		 * Some PCIe parts fail the first check due to
1855 		 * the link being in sleep state, call it again,
1856 		 * if it fails a second time its a real issue.
1857 		 */
1858 		if (em_validate_eeprom_checksum(&sc->hw) < 0) {
1859 			printf("%s: The EEPROM Checksum Is Not Valid\n",
1860 			       DEVNAME(sc));
1861 			return (EIO);
1862 		}
1863 	}
1864 
1865 	if (em_get_flash_presence_i210(&sc->hw) &&
1866 	    em_read_part_num(&sc->hw, &(sc->part_num)) < 0) {
1867 		printf("%s: EEPROM read error while reading part number\n",
1868 		       DEVNAME(sc));
1869 		return (EIO);
1870 	}
1871 
1872 	/* Set up smart power down as default off on newer adapters */
1873 	if (!em_smart_pwr_down &&
1874 	     (sc->hw.mac_type == em_82571 ||
1875 	      sc->hw.mac_type == em_82572 ||
1876 	      sc->hw.mac_type == em_82575 ||
1877 	      sc->hw.mac_type == em_82576 ||
1878 	      sc->hw.mac_type == em_82580 ||
1879 	      sc->hw.mac_type == em_i210 ||
1880 	      sc->hw.mac_type == em_i350 )) {
1881 		uint16_t phy_tmp = 0;
1882 
1883 		/* Speed up time to link by disabling smart power down */
1884 		em_read_phy_reg(&sc->hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
1885 		phy_tmp &= ~IGP02E1000_PM_SPD;
1886 		em_write_phy_reg(&sc->hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
1887 	}
1888 
1889 	em_legacy_irq_quirk_spt(sc);
1890 
1891 	/*
1892 	 * These parameters control the automatic generation (Tx) and
1893 	 * response (Rx) to Ethernet PAUSE frames.
1894 	 * - High water mark should allow for at least two frames to be
1895 	 *   received after sending an XOFF.
1896 	 * - Low water mark works best when it is very near the high water mark.
1897 	 *   This allows the receiver to restart by sending XON when it has
1898 	 *   drained a bit.  Here we use an arbitrary value of 1500 which will
1899 	 *   restart after one full frame is pulled from the buffer.  There
1900 	 *   could be several smaller frames in the buffer and if so they will
1901 	 *   not trigger the XON until their total number reduces the buffer
1902 	 *   by 1500.
1903 	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
1904 	 */
1905 	rx_buffer_size = ((E1000_READ_REG(&sc->hw, PBA) & 0xffff) << 10 );
1906 
1907 	sc->hw.fc_high_water = rx_buffer_size -
1908 	    EM_ROUNDUP(sc->hw.max_frame_size, 1024);
1909 	sc->hw.fc_low_water = sc->hw.fc_high_water - 1500;
1910 	if (sc->hw.mac_type == em_80003es2lan)
1911 		sc->hw.fc_pause_time = 0xFFFF;
1912 	else
1913 		sc->hw.fc_pause_time = 1000;
1914 	sc->hw.fc_send_xon = TRUE;
1915 	sc->hw.fc = E1000_FC_FULL;
1916 
1917 	em_disable_aspm(sc);
1918 
1919 	if ((ret_val = em_init_hw(sc)) != 0) {
1920 		if (ret_val == E1000_DEFER_INIT) {
1921 			INIT_DEBUGOUT("\nHardware Initialization Deferred ");
1922 			return (EAGAIN);
1923 		}
1924 		printf("\n%s: Hardware Initialization Failed: %d\n",
1925 		       DEVNAME(sc), ret_val);
1926 		return (EIO);
1927 	}
1928 
1929 	em_check_for_link(&sc->hw);
1930 
1931 	return (0);
1932 }
1933 
1934 /*********************************************************************
1935  *
1936  *  Setup networking device structure and register an interface.
1937  *
1938  **********************************************************************/
1939 void
1940 em_setup_interface(struct em_softc *sc)
1941 {
1942 	struct ifnet   *ifp;
1943 	uint64_t fiber_type = IFM_1000_SX;
1944 
1945 	INIT_DEBUGOUT("em_setup_interface: begin");
1946 
1947 	ifp = &sc->sc_ac.ac_if;
1948 	strlcpy(ifp->if_xname, DEVNAME(sc), IFNAMSIZ);
1949 	ifp->if_softc = sc;
1950 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1951 	ifp->if_xflags = IFXF_MPSAFE;
1952 	ifp->if_ioctl = em_ioctl;
1953 	ifp->if_qstart = em_start;
1954 	ifp->if_watchdog = em_watchdog;
1955 	ifp->if_hardmtu =
1956 		sc->hw.max_frame_size - ETHER_HDR_LEN - ETHER_CRC_LEN;
1957 	ifq_set_maxlen(&ifp->if_snd, sc->sc_tx_slots - 1);
1958 
1959 	ifp->if_capabilities = IFCAP_VLAN_MTU;
1960 
1961 #if NVLAN > 0
1962 	if (sc->hw.mac_type != em_82575 && sc->hw.mac_type != em_82580 &&
1963 	    sc->hw.mac_type != em_82576 &&
1964 	    sc->hw.mac_type != em_i210 && sc->hw.mac_type != em_i350)
1965 		ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING;
1966 #endif
1967 
1968 	if (sc->hw.mac_type >= em_82543 && sc->hw.mac_type != em_82575 &&
1969 	    sc->hw.mac_type != em_82576 &&
1970 	    sc->hw.mac_type != em_82580 && sc->hw.mac_type != em_i210 &&
1971 	    sc->hw.mac_type != em_i350)
1972 		ifp->if_capabilities |= IFCAP_CSUM_TCPv4 | IFCAP_CSUM_UDPv4;
1973 
1974 	/*
1975 	 * Specify the media types supported by this adapter and register
1976 	 * callbacks to update media and link information
1977 	 */
1978 	ifmedia_init(&sc->media, IFM_IMASK, em_media_change,
1979 		     em_media_status);
1980 	if (sc->hw.media_type == em_media_type_fiber ||
1981 	    sc->hw.media_type == em_media_type_internal_serdes) {
1982 		if (sc->hw.mac_type == em_82545)
1983 			fiber_type = IFM_1000_LX;
1984 		ifmedia_add(&sc->media, IFM_ETHER | fiber_type | IFM_FDX,
1985 			    0, NULL);
1986 		ifmedia_add(&sc->media, IFM_ETHER | fiber_type,
1987 			    0, NULL);
1988 	} else {
1989 		ifmedia_add(&sc->media, IFM_ETHER | IFM_10_T, 0, NULL);
1990 		ifmedia_add(&sc->media, IFM_ETHER | IFM_10_T | IFM_FDX,
1991 			    0, NULL);
1992 		ifmedia_add(&sc->media, IFM_ETHER | IFM_100_TX,
1993 			    0, NULL);
1994 		ifmedia_add(&sc->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
1995 			    0, NULL);
1996 		if (sc->hw.phy_type != em_phy_ife) {
1997 			ifmedia_add(&sc->media, IFM_ETHER | IFM_1000_T | IFM_FDX,
1998 				    0, NULL);
1999 			ifmedia_add(&sc->media, IFM_ETHER | IFM_1000_T, 0, NULL);
2000 		}
2001 	}
2002 	ifmedia_add(&sc->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2003 	ifmedia_set(&sc->media, IFM_ETHER | IFM_AUTO);
2004 
2005 	if_attach(ifp);
2006 	ether_ifattach(ifp);
2007 	em_enable_intr(sc);
2008 }
2009 
2010 int
2011 em_detach(struct device *self, int flags)
2012 {
2013 	struct em_softc *sc = (struct em_softc *)self;
2014 	struct ifnet *ifp = &sc->sc_ac.ac_if;
2015 	struct pci_attach_args *pa = &sc->osdep.em_pa;
2016 	pci_chipset_tag_t	pc = pa->pa_pc;
2017 
2018 	if (sc->sc_intrhand)
2019 		pci_intr_disestablish(pc, sc->sc_intrhand);
2020 	sc->sc_intrhand = 0;
2021 
2022 	em_stop(sc, 1);
2023 
2024 	em_free_pci_resources(sc);
2025 
2026 	ether_ifdetach(ifp);
2027 	if_detach(ifp);
2028 
2029 	return (0);
2030 }
2031 
2032 int
2033 em_activate(struct device *self, int act)
2034 {
2035 	struct em_softc *sc = (struct em_softc *)self;
2036 	struct ifnet *ifp = &sc->sc_ac.ac_if;
2037 	int rv = 0;
2038 
2039 	switch (act) {
2040 	case DVACT_SUSPEND:
2041 		if (ifp->if_flags & IFF_RUNNING)
2042 			em_stop(sc, 0);
2043 		/* We have no children atm, but we will soon */
2044 		rv = config_activate_children(self, act);
2045 		break;
2046 	case DVACT_RESUME:
2047 		if (ifp->if_flags & IFF_UP)
2048 			em_init(sc);
2049 		break;
2050 	default:
2051 		rv = config_activate_children(self, act);
2052 		break;
2053 	}
2054 	return (rv);
2055 }
2056 
2057 /*********************************************************************
2058  *
2059  *  Workaround for SmartSpeed on 82541 and 82547 controllers
2060  *
2061  **********************************************************************/
2062 void
2063 em_smartspeed(struct em_softc *sc)
2064 {
2065 	uint16_t phy_tmp;
2066 
2067 	if (sc->link_active || (sc->hw.phy_type != em_phy_igp) ||
2068 	    !sc->hw.autoneg || !(sc->hw.autoneg_advertised & ADVERTISE_1000_FULL))
2069 		return;
2070 
2071 	if (sc->smartspeed == 0) {
2072 		/* If Master/Slave config fault is asserted twice,
2073 		 * we assume back-to-back */
2074 		em_read_phy_reg(&sc->hw, PHY_1000T_STATUS, &phy_tmp);
2075 		if (!(phy_tmp & SR_1000T_MS_CONFIG_FAULT))
2076 			return;
2077 		em_read_phy_reg(&sc->hw, PHY_1000T_STATUS, &phy_tmp);
2078 		if (phy_tmp & SR_1000T_MS_CONFIG_FAULT) {
2079 			em_read_phy_reg(&sc->hw, PHY_1000T_CTRL,
2080 					&phy_tmp);
2081 			if (phy_tmp & CR_1000T_MS_ENABLE) {
2082 				phy_tmp &= ~CR_1000T_MS_ENABLE;
2083 				em_write_phy_reg(&sc->hw,
2084 						    PHY_1000T_CTRL, phy_tmp);
2085 				sc->smartspeed++;
2086 				if (sc->hw.autoneg &&
2087 				    !em_phy_setup_autoneg(&sc->hw) &&
2088 				    !em_read_phy_reg(&sc->hw, PHY_CTRL,
2089 						       &phy_tmp)) {
2090 					phy_tmp |= (MII_CR_AUTO_NEG_EN |
2091 						    MII_CR_RESTART_AUTO_NEG);
2092 					em_write_phy_reg(&sc->hw,
2093 							 PHY_CTRL, phy_tmp);
2094 				}
2095 			}
2096 		}
2097 		return;
2098 	} else if (sc->smartspeed == EM_SMARTSPEED_DOWNSHIFT) {
2099 		/* If still no link, perhaps using 2/3 pair cable */
2100 		em_read_phy_reg(&sc->hw, PHY_1000T_CTRL, &phy_tmp);
2101 		phy_tmp |= CR_1000T_MS_ENABLE;
2102 		em_write_phy_reg(&sc->hw, PHY_1000T_CTRL, phy_tmp);
2103 		if (sc->hw.autoneg &&
2104 		    !em_phy_setup_autoneg(&sc->hw) &&
2105 		    !em_read_phy_reg(&sc->hw, PHY_CTRL, &phy_tmp)) {
2106 			phy_tmp |= (MII_CR_AUTO_NEG_EN |
2107 				    MII_CR_RESTART_AUTO_NEG);
2108 			em_write_phy_reg(&sc->hw, PHY_CTRL, phy_tmp);
2109 		}
2110 	}
2111 	/* Restart process after EM_SMARTSPEED_MAX iterations */
2112 	if (sc->smartspeed++ == EM_SMARTSPEED_MAX)
2113 		sc->smartspeed = 0;
2114 }
2115 
2116 /*
2117  * Manage DMA'able memory.
2118  */
2119 int
2120 em_dma_malloc(struct em_softc *sc, bus_size_t size, struct em_dma_alloc *dma)
2121 {
2122 	int r;
2123 
2124 	r = bus_dmamap_create(sc->sc_dmat, size, 1,
2125 	    size, 0, BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, &dma->dma_map);
2126 	if (r != 0)
2127 		return (r);
2128 
2129 	r = bus_dmamem_alloc(sc->sc_dmat, size, PAGE_SIZE, 0, &dma->dma_seg,
2130 	    1, &dma->dma_nseg, BUS_DMA_WAITOK | BUS_DMA_ZERO);
2131 	if (r != 0)
2132 		goto destroy;
2133 
2134 	r = bus_dmamem_map(sc->sc_dmat, &dma->dma_seg, dma->dma_nseg, size,
2135 	    &dma->dma_vaddr, BUS_DMA_WAITOK | BUS_DMA_COHERENT);
2136 	if (r != 0)
2137 		goto free;
2138 
2139 	r = bus_dmamap_load(sc->sc_dmat, dma->dma_map, dma->dma_vaddr, size,
2140 	    NULL, BUS_DMA_WAITOK);
2141 	if (r != 0)
2142 		goto unmap;
2143 
2144 	dma->dma_size = size;
2145 	return (0);
2146 
2147 unmap:
2148 	bus_dmamem_unmap(sc->sc_dmat, dma->dma_vaddr, size);
2149 free:
2150 	bus_dmamem_free(sc->sc_dmat, &dma->dma_seg, dma->dma_nseg);
2151 destroy:
2152 	bus_dmamap_destroy(sc->sc_dmat, dma->dma_map);
2153 
2154 	return (r);
2155 }
2156 
2157 void
2158 em_dma_free(struct em_softc *sc, struct em_dma_alloc *dma)
2159 {
2160 	bus_dmamap_unload(sc->sc_dmat, dma->dma_map);
2161 	bus_dmamem_unmap(sc->sc_dmat, dma->dma_vaddr, dma->dma_size);
2162 	bus_dmamem_free(sc->sc_dmat, &dma->dma_seg, dma->dma_nseg);
2163 	bus_dmamap_destroy(sc->sc_dmat, dma->dma_map);
2164 }
2165 
2166 /*********************************************************************
2167  *
2168  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2169  *  the information needed to transmit a packet on the wire.
2170  *
2171  **********************************************************************/
2172 int
2173 em_allocate_transmit_structures(struct em_softc *sc)
2174 {
2175 	struct em_queue *que;
2176 
2177 	FOREACH_QUEUE(sc, que) {
2178 		bus_dmamap_sync(sc->sc_dmat, que->tx.sc_tx_dma.dma_map,
2179 		    0, que->tx.sc_tx_dma.dma_map->dm_mapsize,
2180 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2181 
2182 		que->tx.sc_tx_pkts_ring = mallocarray(sc->sc_tx_slots,
2183 		    sizeof(*que->tx.sc_tx_pkts_ring), M_DEVBUF, M_NOWAIT | M_ZERO);
2184 		if (que->tx.sc_tx_pkts_ring == NULL) {
2185 			printf("%s: Unable to allocate tx_buffer memory\n",
2186 			    DEVNAME(sc));
2187 			return (ENOMEM);
2188 		}
2189 	}
2190 
2191 	return (0);
2192 }
2193 
2194 /*********************************************************************
2195  *
2196  *  Allocate and initialize transmit structures.
2197  *
2198  **********************************************************************/
2199 int
2200 em_setup_transmit_structures(struct em_softc *sc)
2201 {
2202 	struct em_queue *que;
2203 	struct em_packet *pkt;
2204 	int error, i;
2205 
2206 	if ((error = em_allocate_transmit_structures(sc)) != 0)
2207 		goto fail;
2208 
2209 	FOREACH_QUEUE(sc, que) {
2210 		bzero((void *) que->tx.sc_tx_desc_ring,
2211 		    (sizeof(struct em_tx_desc)) * sc->sc_tx_slots);
2212 
2213 		for (i = 0; i < sc->sc_tx_slots; i++) {
2214 			pkt = &que->tx.sc_tx_pkts_ring[i];
2215 			error = bus_dmamap_create(sc->sc_dmat, MAX_JUMBO_FRAME_SIZE,
2216 			    EM_MAX_SCATTER / (sc->pcix_82544 ? 2 : 1),
2217 			    MAX_JUMBO_FRAME_SIZE, 0, BUS_DMA_NOWAIT, &pkt->pkt_map);
2218 			if (error != 0) {
2219 				printf("%s: Unable to create TX DMA map\n",
2220 				    DEVNAME(sc));
2221 				goto fail;
2222 			}
2223 		}
2224 
2225 		que->tx.sc_tx_desc_head = 0;
2226 		que->tx.sc_tx_desc_tail = 0;
2227 
2228 		/* Set checksum context */
2229 		que->tx.active_checksum_context = OFFLOAD_NONE;
2230 	}
2231 
2232 	return (0);
2233 
2234 fail:
2235 	em_free_transmit_structures(sc);
2236 	return (error);
2237 }
2238 
2239 /*********************************************************************
2240  *
2241  *  Enable transmit unit.
2242  *
2243  **********************************************************************/
2244 void
2245 em_initialize_transmit_unit(struct em_softc *sc)
2246 {
2247 	u_int32_t	reg_tctl, reg_tipg = 0;
2248 	u_int64_t	bus_addr;
2249 	struct em_queue *que;
2250 
2251 	INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
2252 
2253 	FOREACH_QUEUE(sc, que) {
2254 		/* Setup the Base and Length of the Tx Descriptor Ring */
2255 		bus_addr = que->tx.sc_tx_dma.dma_map->dm_segs[0].ds_addr;
2256 		E1000_WRITE_REG(&sc->hw, TDLEN(que->me),
2257 		    sc->sc_tx_slots *
2258 		    sizeof(struct em_tx_desc));
2259 		E1000_WRITE_REG(&sc->hw, TDBAH(que->me), (u_int32_t)(bus_addr >> 32));
2260 		E1000_WRITE_REG(&sc->hw, TDBAL(que->me), (u_int32_t)bus_addr);
2261 
2262 		/* Setup the HW Tx Head and Tail descriptor pointers */
2263 		E1000_WRITE_REG(&sc->hw, TDT(que->me), 0);
2264 		E1000_WRITE_REG(&sc->hw, TDH(que->me), 0);
2265 
2266 		HW_DEBUGOUT2("Base = %x, Length = %x\n",
2267 		    E1000_READ_REG(&sc->hw, TDBAL(que->me)),
2268 		    E1000_READ_REG(&sc->hw, TDLEN(que->me)));
2269 
2270 		/* Set the default values for the Tx Inter Packet Gap timer */
2271 		switch (sc->hw.mac_type) {
2272 		case em_82542_rev2_0:
2273 		case em_82542_rev2_1:
2274 			reg_tipg = DEFAULT_82542_TIPG_IPGT;
2275 			reg_tipg |= DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
2276 			reg_tipg |= DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
2277 			break;
2278 		case em_80003es2lan:
2279 			reg_tipg = DEFAULT_82543_TIPG_IPGR1;
2280 			reg_tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
2281 			break;
2282 		default:
2283 			if (sc->hw.media_type == em_media_type_fiber ||
2284 			    sc->hw.media_type == em_media_type_internal_serdes)
2285 				reg_tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
2286 			else
2287 				reg_tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
2288 			reg_tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
2289 			reg_tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
2290 		}
2291 
2292 
2293 		E1000_WRITE_REG(&sc->hw, TIPG, reg_tipg);
2294 		E1000_WRITE_REG(&sc->hw, TIDV, sc->tx_int_delay);
2295 		if (sc->hw.mac_type >= em_82540)
2296 			E1000_WRITE_REG(&sc->hw, TADV, sc->tx_abs_int_delay);
2297 
2298 		/* Setup Transmit Descriptor Base Settings */
2299 		que->tx.sc_txd_cmd = E1000_TXD_CMD_IFCS;
2300 
2301 		if (sc->hw.mac_type == em_82575 || sc->hw.mac_type == em_82580 ||
2302 		    sc->hw.mac_type == em_82576 ||
2303 		    sc->hw.mac_type == em_i210 || sc->hw.mac_type == em_i350) {
2304 			/* 82575/6 need to enable the TX queue and lack the IDE bit */
2305 			reg_tctl = E1000_READ_REG(&sc->hw, TXDCTL(que->me));
2306 			reg_tctl |= E1000_TXDCTL_QUEUE_ENABLE;
2307 			E1000_WRITE_REG(&sc->hw, TXDCTL(que->me), reg_tctl);
2308 		} else if (sc->tx_int_delay > 0)
2309 			que->tx.sc_txd_cmd |= E1000_TXD_CMD_IDE;
2310 	}
2311 
2312 	/* Program the Transmit Control Register */
2313 	reg_tctl = E1000_TCTL_PSP | E1000_TCTL_EN |
2314 		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2315 	if (sc->hw.mac_type >= em_82571)
2316 		reg_tctl |= E1000_TCTL_MULR;
2317 	if (sc->link_duplex == FULL_DUPLEX)
2318 		reg_tctl |= E1000_FDX_COLLISION_DISTANCE << E1000_COLD_SHIFT;
2319 	else
2320 		reg_tctl |= E1000_HDX_COLLISION_DISTANCE << E1000_COLD_SHIFT;
2321 	/* This write will effectively turn on the transmit unit */
2322 	E1000_WRITE_REG(&sc->hw, TCTL, reg_tctl);
2323 
2324 	/* SPT Si errata workaround to avoid data corruption */
2325 
2326 	if (sc->hw.mac_type == em_pch_spt) {
2327 		uint32_t	reg_val;
2328 
2329 		reg_val = EM_READ_REG(&sc->hw, E1000_IOSFPC);
2330 		reg_val |= E1000_RCTL_RDMTS_HEX;
2331 		EM_WRITE_REG(&sc->hw, E1000_IOSFPC, reg_val);
2332 
2333 		reg_val = E1000_READ_REG(&sc->hw, TARC0);
2334 		/* i218-i219 Specification Update 1.5.4.5 */
2335 		reg_val &= ~E1000_TARC0_CB_MULTIQ_3_REQ;
2336 		reg_val |= E1000_TARC0_CB_MULTIQ_2_REQ;
2337 		E1000_WRITE_REG(&sc->hw, TARC0, reg_val);
2338 	}
2339 }
2340 
2341 /*********************************************************************
2342  *
2343  *  Free all transmit related data structures.
2344  *
2345  **********************************************************************/
2346 void
2347 em_free_transmit_structures(struct em_softc *sc)
2348 {
2349 	struct em_queue *que;
2350 	struct em_packet *pkt;
2351 	int i;
2352 
2353 	INIT_DEBUGOUT("free_transmit_structures: begin");
2354 
2355 	FOREACH_QUEUE(sc, que) {
2356 		if (que->tx.sc_tx_pkts_ring != NULL) {
2357 			for (i = 0; i < sc->sc_tx_slots; i++) {
2358 				pkt = &que->tx.sc_tx_pkts_ring[i];
2359 
2360 				if (pkt->pkt_m != NULL) {
2361 					bus_dmamap_sync(sc->sc_dmat, pkt->pkt_map,
2362 					    0, pkt->pkt_map->dm_mapsize,
2363 					    BUS_DMASYNC_POSTWRITE);
2364 					bus_dmamap_unload(sc->sc_dmat,
2365 					    pkt->pkt_map);
2366 
2367 					m_freem(pkt->pkt_m);
2368 					pkt->pkt_m = NULL;
2369 				}
2370 
2371 				if (pkt->pkt_map != NULL) {
2372 					bus_dmamap_destroy(sc->sc_dmat,
2373 					    pkt->pkt_map);
2374 					pkt->pkt_map = NULL;
2375 				}
2376 			}
2377 
2378 			free(que->tx.sc_tx_pkts_ring, M_DEVBUF,
2379 			    sc->sc_tx_slots * sizeof(*que->tx.sc_tx_pkts_ring));
2380 			que->tx.sc_tx_pkts_ring = NULL;
2381 		}
2382 
2383 		bus_dmamap_sync(sc->sc_dmat, que->tx.sc_tx_dma.dma_map,
2384 		    0, que->tx.sc_tx_dma.dma_map->dm_mapsize,
2385 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2386 	}
2387 }
2388 
2389 /*********************************************************************
2390  *
2391  *  The offload context needs to be set when we transfer the first
2392  *  packet of a particular protocol (TCP/UDP). We change the
2393  *  context only if the protocol type changes.
2394  *
2395  **********************************************************************/
2396 u_int
2397 em_transmit_checksum_setup(struct em_queue *que, struct mbuf *mp, u_int head,
2398     u_int32_t *txd_upper, u_int32_t *txd_lower)
2399 {
2400 	struct em_context_desc *TXD;
2401 
2402 	if (mp->m_pkthdr.csum_flags & M_TCP_CSUM_OUT) {
2403 		*txd_upper = E1000_TXD_POPTS_TXSM << 8;
2404 		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
2405 		if (que->tx.active_checksum_context == OFFLOAD_TCP_IP)
2406 			return (0);
2407 		else
2408 			que->tx.active_checksum_context = OFFLOAD_TCP_IP;
2409 	} else if (mp->m_pkthdr.csum_flags & M_UDP_CSUM_OUT) {
2410 		*txd_upper = E1000_TXD_POPTS_TXSM << 8;
2411 		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
2412 		if (que->tx.active_checksum_context == OFFLOAD_UDP_IP)
2413 			return (0);
2414 		else
2415 			que->tx.active_checksum_context = OFFLOAD_UDP_IP;
2416 	} else {
2417 		*txd_upper = 0;
2418 		*txd_lower = 0;
2419 		return (0);
2420 	}
2421 
2422 	/* If we reach this point, the checksum offload context
2423 	 * needs to be reset.
2424 	 */
2425 	TXD = (struct em_context_desc *)&que->tx.sc_tx_desc_ring[head];
2426 
2427 	TXD->lower_setup.ip_fields.ipcss = ETHER_HDR_LEN;
2428 	TXD->lower_setup.ip_fields.ipcso =
2429 	    ETHER_HDR_LEN + offsetof(struct ip, ip_sum);
2430 	TXD->lower_setup.ip_fields.ipcse =
2431 	    htole16(ETHER_HDR_LEN + sizeof(struct ip) - 1);
2432 
2433 	TXD->upper_setup.tcp_fields.tucss =
2434 	    ETHER_HDR_LEN + sizeof(struct ip);
2435 	TXD->upper_setup.tcp_fields.tucse = htole16(0);
2436 
2437 	if (que->tx.active_checksum_context == OFFLOAD_TCP_IP) {
2438 		TXD->upper_setup.tcp_fields.tucso =
2439 		    ETHER_HDR_LEN + sizeof(struct ip) +
2440 		    offsetof(struct tcphdr, th_sum);
2441 	} else if (que->tx.active_checksum_context == OFFLOAD_UDP_IP) {
2442 		TXD->upper_setup.tcp_fields.tucso =
2443 		    ETHER_HDR_LEN + sizeof(struct ip) +
2444 		    offsetof(struct udphdr, uh_sum);
2445 	}
2446 
2447 	TXD->tcp_seg_setup.data = htole32(0);
2448 	TXD->cmd_and_length = htole32(que->tx.sc_txd_cmd | E1000_TXD_CMD_DEXT);
2449 
2450 	return (1);
2451 }
2452 
2453 /**********************************************************************
2454  *
2455  *  Examine each tx_buffer in the used queue. If the hardware is done
2456  *  processing the packet then free associated resources. The
2457  *  tx_buffer is put back on the free queue.
2458  *
2459  **********************************************************************/
2460 void
2461 em_txeof(struct em_queue *que)
2462 {
2463 	struct em_softc *sc = que->sc;
2464 	struct ifnet *ifp = &sc->sc_ac.ac_if;
2465 	struct em_packet *pkt;
2466 	struct em_tx_desc *desc;
2467 	u_int head, tail;
2468 	u_int free = 0;
2469 
2470 	head = que->tx.sc_tx_desc_head;
2471 	tail = que->tx.sc_tx_desc_tail;
2472 
2473 	if (head == tail)
2474 		return;
2475 
2476 	bus_dmamap_sync(sc->sc_dmat, que->tx.sc_tx_dma.dma_map,
2477 	    0, que->tx.sc_tx_dma.dma_map->dm_mapsize,
2478 	    BUS_DMASYNC_POSTREAD);
2479 
2480 	do {
2481 		pkt = &que->tx.sc_tx_pkts_ring[tail];
2482 		desc = &que->tx.sc_tx_desc_ring[pkt->pkt_eop];
2483 
2484 		if (!ISSET(desc->upper.fields.status, E1000_TXD_STAT_DD))
2485 			break;
2486 
2487 		bus_dmamap_sync(sc->sc_dmat, pkt->pkt_map,
2488 		    0, pkt->pkt_map->dm_mapsize,
2489 		    BUS_DMASYNC_POSTWRITE);
2490 		bus_dmamap_unload(sc->sc_dmat, pkt->pkt_map);
2491 
2492 		KASSERT(pkt->pkt_m != NULL);
2493 
2494 		m_freem(pkt->pkt_m);
2495 		pkt->pkt_m = NULL;
2496 
2497 		tail = pkt->pkt_eop;
2498 
2499 		if (++tail == sc->sc_tx_slots)
2500 			tail = 0;
2501 
2502 		free++;
2503 	} while (tail != head);
2504 
2505 	bus_dmamap_sync(sc->sc_dmat, que->tx.sc_tx_dma.dma_map,
2506 	    0, que->tx.sc_tx_dma.dma_map->dm_mapsize,
2507 	    BUS_DMASYNC_PREREAD);
2508 
2509 	if (free == 0)
2510 		return;
2511 
2512 	que->tx.sc_tx_desc_tail = tail;
2513 
2514 	if (ifq_is_oactive(&ifp->if_snd))
2515 		ifq_restart(&ifp->if_snd);
2516 	else if (tail == head)
2517 		ifp->if_timer = 0;
2518 }
2519 
2520 /*********************************************************************
2521  *
2522  *  Get a buffer from system mbuf buffer pool.
2523  *
2524  **********************************************************************/
2525 int
2526 em_get_buf(struct em_queue *que, int i)
2527 {
2528 	struct em_softc *sc = que->sc;
2529 	struct mbuf    *m;
2530 	struct em_packet *pkt;
2531 	struct em_rx_desc *desc;
2532 	int error;
2533 
2534 	pkt = &que->rx.sc_rx_pkts_ring[i];
2535 	desc = &que->rx.sc_rx_desc_ring[i];
2536 
2537 	KASSERT(pkt->pkt_m == NULL);
2538 
2539 	m = MCLGETL(NULL, M_DONTWAIT, EM_MCLBYTES);
2540 	if (m == NULL) {
2541 		sc->mbuf_cluster_failed++;
2542 		return (ENOBUFS);
2543 	}
2544 	m->m_len = m->m_pkthdr.len = EM_MCLBYTES;
2545 	m_adj(m, ETHER_ALIGN);
2546 
2547 	error = bus_dmamap_load_mbuf(sc->sc_dmat, pkt->pkt_map,
2548 	    m, BUS_DMA_NOWAIT);
2549 	if (error) {
2550 		m_freem(m);
2551 		return (error);
2552 	}
2553 
2554 	bus_dmamap_sync(sc->sc_dmat, pkt->pkt_map,
2555 	    0, pkt->pkt_map->dm_mapsize,
2556 	    BUS_DMASYNC_PREREAD);
2557 	pkt->pkt_m = m;
2558 
2559 	memset(desc, 0, sizeof(*desc));
2560 	htolem64(&desc->buffer_addr, pkt->pkt_map->dm_segs[0].ds_addr);
2561 
2562 	return (0);
2563 }
2564 
2565 /*********************************************************************
2566  *
2567  *  Allocate memory for rx_buffer structures. Since we use one
2568  *  rx_buffer per received packet, the maximum number of rx_buffer's
2569  *  that we'll need is equal to the number of receive descriptors
2570  *  that we've allocated.
2571  *
2572  **********************************************************************/
2573 int
2574 em_allocate_receive_structures(struct em_softc *sc)
2575 {
2576 	struct em_queue *que;
2577 	struct em_packet *pkt;
2578 	int i;
2579 	int error;
2580 
2581 	FOREACH_QUEUE(sc, que) {
2582 		que->rx.sc_rx_pkts_ring = mallocarray(sc->sc_rx_slots,
2583 		    sizeof(*que->rx.sc_rx_pkts_ring),
2584 		    M_DEVBUF, M_NOWAIT | M_ZERO);
2585 		if (que->rx.sc_rx_pkts_ring == NULL) {
2586 			printf("%s: Unable to allocate rx_buffer memory\n",
2587 			    DEVNAME(sc));
2588 			return (ENOMEM);
2589 		}
2590 
2591 		bus_dmamap_sync(sc->sc_dmat, que->rx.sc_rx_dma.dma_map,
2592 		    0, que->rx.sc_rx_dma.dma_map->dm_mapsize,
2593 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2594 
2595 		for (i = 0; i < sc->sc_rx_slots; i++) {
2596 			pkt = &que->rx.sc_rx_pkts_ring[i];
2597 
2598 			error = bus_dmamap_create(sc->sc_dmat, EM_MCLBYTES, 1,
2599 			    EM_MCLBYTES, 0, BUS_DMA_NOWAIT, &pkt->pkt_map);
2600 			if (error != 0) {
2601 				printf("%s: em_allocate_receive_structures: "
2602 				    "bus_dmamap_create failed; error %u\n",
2603 				    DEVNAME(sc), error);
2604 				goto fail;
2605 			}
2606 
2607 			pkt->pkt_m = NULL;
2608 		}
2609 	}
2610 
2611         return (0);
2612 
2613 fail:
2614 	em_free_receive_structures(sc);
2615 	return (error);
2616 }
2617 
2618 /*********************************************************************
2619  *
2620  *  Allocate and initialize receive structures.
2621  *
2622  **********************************************************************/
2623 int
2624 em_setup_receive_structures(struct em_softc *sc)
2625 {
2626 	struct ifnet *ifp = &sc->sc_ac.ac_if;
2627 	struct em_queue *que;
2628 	u_int lwm;
2629 
2630 	if (em_allocate_receive_structures(sc))
2631 		return (ENOMEM);
2632 
2633 	FOREACH_QUEUE(sc, que) {
2634 		memset(que->rx.sc_rx_desc_ring, 0,
2635 		    sc->sc_rx_slots * sizeof(*que->rx.sc_rx_desc_ring));
2636 
2637 		/* Setup our descriptor pointers */
2638 		que->rx.sc_rx_desc_tail = 0;
2639 		que->rx.sc_rx_desc_head = sc->sc_rx_slots - 1;
2640 
2641 		lwm = max(4, 2 * ((ifp->if_hardmtu / MCLBYTES) + 1));
2642 		if_rxr_init(&que->rx.sc_rx_ring, lwm, sc->sc_rx_slots);
2643 
2644 		if (em_rxfill(que) == 0) {
2645 			printf("%s: unable to fill any rx descriptors\n",
2646 			    DEVNAME(sc));
2647 			return (ENOMEM);
2648 		}
2649 	}
2650 
2651 	return (0);
2652 }
2653 
2654 /*********************************************************************
2655  *
2656  *  Enable receive unit.
2657  *
2658  **********************************************************************/
2659 void
2660 em_initialize_receive_unit(struct em_softc *sc)
2661 {
2662 	struct em_queue *que;
2663 	u_int32_t	reg_rctl;
2664 	u_int32_t	reg_rxcsum;
2665 	u_int32_t	reg_srrctl;
2666 	u_int64_t	bus_addr;
2667 
2668 	INIT_DEBUGOUT("em_initialize_receive_unit: begin");
2669 
2670 	/* Make sure receives are disabled while setting up the descriptor ring */
2671 	E1000_WRITE_REG(&sc->hw, RCTL, 0);
2672 
2673 	/* Set the Receive Delay Timer Register */
2674 	E1000_WRITE_REG(&sc->hw, RDTR,
2675 			sc->rx_int_delay | E1000_RDT_FPDB);
2676 
2677 	if (sc->hw.mac_type >= em_82540) {
2678 		if (sc->rx_int_delay)
2679 			E1000_WRITE_REG(&sc->hw, RADV, sc->rx_abs_int_delay);
2680 
2681 		/* Set the interrupt throttling rate.  Value is calculated
2682 		 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns) */
2683 		E1000_WRITE_REG(&sc->hw, ITR, DEFAULT_ITR);
2684 	}
2685 
2686 	/* Setup the Receive Control Register */
2687 	reg_rctl = E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
2688 	    E1000_RCTL_RDMTS_HALF |
2689 	    (sc->hw.mc_filter_type << E1000_RCTL_MO_SHIFT);
2690 
2691 	if (sc->hw.tbi_compatibility_on == TRUE)
2692 		reg_rctl |= E1000_RCTL_SBP;
2693 
2694 	/*
2695 	 * The i350 has a bug where it always strips the CRC whether
2696 	 * asked to or not.  So ask for stripped CRC here and
2697 	 * cope in rxeof
2698 	 */
2699 	if (sc->hw.mac_type == em_i210 || sc->hw.mac_type == em_i350)
2700 		reg_rctl |= E1000_RCTL_SECRC;
2701 
2702 	switch (sc->sc_rx_buffer_len) {
2703 	default:
2704 	case EM_RXBUFFER_2048:
2705 		reg_rctl |= E1000_RCTL_SZ_2048;
2706 		break;
2707 	case EM_RXBUFFER_4096:
2708 		reg_rctl |= E1000_RCTL_SZ_4096|E1000_RCTL_BSEX|E1000_RCTL_LPE;
2709 		break;
2710 	case EM_RXBUFFER_8192:
2711 		reg_rctl |= E1000_RCTL_SZ_8192|E1000_RCTL_BSEX|E1000_RCTL_LPE;
2712 		break;
2713 	case EM_RXBUFFER_16384:
2714 		reg_rctl |= E1000_RCTL_SZ_16384|E1000_RCTL_BSEX|E1000_RCTL_LPE;
2715 		break;
2716 	}
2717 
2718 	if (sc->hw.max_frame_size != ETHER_MAX_LEN)
2719 		reg_rctl |= E1000_RCTL_LPE;
2720 
2721 	/* Enable 82543 Receive Checksum Offload for TCP and UDP */
2722 	if (sc->hw.mac_type >= em_82543) {
2723 		reg_rxcsum = E1000_READ_REG(&sc->hw, RXCSUM);
2724 		reg_rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
2725 		E1000_WRITE_REG(&sc->hw, RXCSUM, reg_rxcsum);
2726 	}
2727 
2728 	/*
2729 	 * XXX TEMPORARY WORKAROUND: on some systems with 82573
2730 	 * long latencies are observed, like Lenovo X60.
2731 	 */
2732 	if (sc->hw.mac_type == em_82573)
2733 		E1000_WRITE_REG(&sc->hw, RDTR, 0x20);
2734 
2735 	FOREACH_QUEUE(sc, que) {
2736 		if (sc->num_queues > 1) {
2737 			/*
2738 			 * Disable Drop Enable for every queue, default has
2739 			 * it enabled for queues > 0
2740 			 */
2741 			reg_srrctl = E1000_READ_REG(&sc->hw, SRRCTL(que->me));
2742 			reg_srrctl &= ~E1000_SRRCTL_DROP_EN;
2743 			E1000_WRITE_REG(&sc->hw, SRRCTL(que->me), reg_srrctl);
2744 		}
2745 
2746 		/* Setup the Base and Length of the Rx Descriptor Ring */
2747 		bus_addr = que->rx.sc_rx_dma.dma_map->dm_segs[0].ds_addr;
2748 		E1000_WRITE_REG(&sc->hw, RDLEN(que->me),
2749 		    sc->sc_rx_slots * sizeof(*que->rx.sc_rx_desc_ring));
2750 		E1000_WRITE_REG(&sc->hw, RDBAH(que->me), (u_int32_t)(bus_addr >> 32));
2751 		E1000_WRITE_REG(&sc->hw, RDBAL(que->me), (u_int32_t)bus_addr);
2752 
2753 		if (sc->hw.mac_type == em_82575 || sc->hw.mac_type == em_82580 ||
2754 		    sc->hw.mac_type == em_82576 ||
2755 		    sc->hw.mac_type == em_i210 || sc->hw.mac_type == em_i350) {
2756 			/* 82575/6 need to enable the RX queue */
2757 			uint32_t reg;
2758 			reg = E1000_READ_REG(&sc->hw, RXDCTL(que->me));
2759 			reg |= E1000_RXDCTL_QUEUE_ENABLE;
2760 			E1000_WRITE_REG(&sc->hw, RXDCTL(que->me), reg);
2761 		}
2762 	}
2763 
2764 	/* Enable Receives */
2765 	E1000_WRITE_REG(&sc->hw, RCTL, reg_rctl);
2766 
2767 	/* Setup the HW Rx Head and Tail Descriptor Pointers */
2768 	FOREACH_QUEUE(sc, que) {
2769 		E1000_WRITE_REG(&sc->hw, RDH(que->me), 0);
2770 		E1000_WRITE_REG(&sc->hw, RDT(que->me), que->rx.sc_rx_desc_head);
2771 	}
2772 }
2773 
2774 /*********************************************************************
2775  *
2776  *  Free receive related data structures.
2777  *
2778  **********************************************************************/
2779 void
2780 em_free_receive_structures(struct em_softc *sc)
2781 {
2782 	struct em_queue *que;
2783 	struct em_packet *pkt;
2784 	int i;
2785 
2786 	INIT_DEBUGOUT("free_receive_structures: begin");
2787 
2788 	FOREACH_QUEUE(sc, que) {
2789 		if_rxr_init(&que->rx.sc_rx_ring, 0, 0);
2790 
2791 		bus_dmamap_sync(sc->sc_dmat, que->rx.sc_rx_dma.dma_map,
2792 		    0, que->rx.sc_rx_dma.dma_map->dm_mapsize,
2793 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2794 
2795 		if (que->rx.sc_rx_pkts_ring != NULL) {
2796 			for (i = 0; i < sc->sc_rx_slots; i++) {
2797 				pkt = &que->rx.sc_rx_pkts_ring[i];
2798 				if (pkt->pkt_m != NULL) {
2799 					bus_dmamap_sync(sc->sc_dmat,
2800 					    pkt->pkt_map,
2801 					    0, pkt->pkt_map->dm_mapsize,
2802 					    BUS_DMASYNC_POSTREAD);
2803 					bus_dmamap_unload(sc->sc_dmat,
2804 					    pkt->pkt_map);
2805 					m_freem(pkt->pkt_m);
2806 					pkt->pkt_m = NULL;
2807 				}
2808 				bus_dmamap_destroy(sc->sc_dmat, pkt->pkt_map);
2809 			}
2810 
2811 			free(que->rx.sc_rx_pkts_ring, M_DEVBUF,
2812 			    sc->sc_rx_slots * sizeof(*que->rx.sc_rx_pkts_ring));
2813 			que->rx.sc_rx_pkts_ring = NULL;
2814 		}
2815 
2816 		if (que->rx.fmp != NULL) {
2817 			m_freem(que->rx.fmp);
2818 			que->rx.fmp = NULL;
2819 			que->rx.lmp = NULL;
2820 		}
2821 	}
2822 }
2823 
2824 int
2825 em_rxfill(struct em_queue *que)
2826 {
2827 	struct em_softc *sc = que->sc;
2828 	u_int slots;
2829 	int post = 0;
2830 	int i;
2831 
2832 	i = que->rx.sc_rx_desc_head;
2833 
2834 	bus_dmamap_sync(sc->sc_dmat, que->rx.sc_rx_dma.dma_map,
2835 	    0, que->rx.sc_rx_dma.dma_map->dm_mapsize,
2836 	    BUS_DMASYNC_POSTWRITE);
2837 
2838 	for (slots = if_rxr_get(&que->rx.sc_rx_ring, sc->sc_rx_slots);
2839 	    slots > 0; slots--) {
2840 		if (++i == sc->sc_rx_slots)
2841 			i = 0;
2842 
2843 		if (em_get_buf(que, i) != 0)
2844 			break;
2845 
2846 		que->rx.sc_rx_desc_head = i;
2847 		post = 1;
2848 	}
2849 
2850 	if_rxr_put(&que->rx.sc_rx_ring, slots);
2851 
2852 	bus_dmamap_sync(sc->sc_dmat, que->rx.sc_rx_dma.dma_map,
2853 	    0, que->rx.sc_rx_dma.dma_map->dm_mapsize,
2854 	    BUS_DMASYNC_PREWRITE);
2855 
2856 	return (post);
2857 }
2858 
2859 void
2860 em_rxrefill(void *arg)
2861 {
2862 	struct em_queue *que = arg;
2863 	struct em_softc *sc = que->sc;
2864 
2865 	if (em_rxfill(que))
2866 		E1000_WRITE_REG(&sc->hw, RDT(que->me), que->rx.sc_rx_desc_head);
2867 	else if (if_rxr_needrefill(&que->rx.sc_rx_ring))
2868 		timeout_add(&que->rx_refill, 1);
2869 }
2870 
2871 /*********************************************************************
2872  *
2873  *  This routine executes in interrupt context. It replenishes
2874  *  the mbufs in the descriptor and sends data which has been
2875  *  dma'ed into host memory to upper layer.
2876  *
2877  *********************************************************************/
2878 int
2879 em_rxeof(struct em_queue *que)
2880 {
2881 	struct em_softc	    *sc = que->sc;
2882 	struct ifnet	    *ifp = &sc->sc_ac.ac_if;
2883 	struct mbuf_list    ml = MBUF_LIST_INITIALIZER();
2884 	struct mbuf	    *m;
2885 	u_int8_t	    accept_frame = 0;
2886 	u_int8_t	    eop = 0;
2887 	u_int16_t	    len, desc_len, prev_len_adj;
2888 	int		    i, rv = 0;
2889 
2890 	/* Pointer to the receive descriptor being examined. */
2891 	struct em_rx_desc   *desc;
2892 	struct em_packet    *pkt;
2893 	u_int8_t	    status;
2894 
2895 	if (if_rxr_inuse(&que->rx.sc_rx_ring) == 0)
2896 		return (0);
2897 
2898 	i = que->rx.sc_rx_desc_tail;
2899 
2900 	bus_dmamap_sync(sc->sc_dmat, que->rx.sc_rx_dma.dma_map,
2901 	    0, que->rx.sc_rx_dma.dma_map->dm_mapsize,
2902 	    BUS_DMASYNC_POSTREAD);
2903 
2904 	do {
2905 		m = NULL;
2906 
2907 		pkt = &que->rx.sc_rx_pkts_ring[i];
2908 		desc = &que->rx.sc_rx_desc_ring[i];
2909 
2910 		status = desc->status;
2911 		if (!ISSET(status, E1000_RXD_STAT_DD))
2912 			break;
2913 
2914 		/* pull the mbuf off the ring */
2915 		bus_dmamap_sync(sc->sc_dmat, pkt->pkt_map,
2916 		    0, pkt->pkt_map->dm_mapsize,
2917 		    BUS_DMASYNC_POSTREAD);
2918 		bus_dmamap_unload(sc->sc_dmat, pkt->pkt_map);
2919 		m = pkt->pkt_m;
2920 		pkt->pkt_m = NULL;
2921 
2922 		KASSERT(m != NULL);
2923 
2924 		if_rxr_put(&que->rx.sc_rx_ring, 1);
2925 		rv = 1;
2926 
2927 		accept_frame = 1;
2928 		prev_len_adj = 0;
2929 		desc_len = letoh16(desc->length);
2930 
2931 		if (status & E1000_RXD_STAT_EOP) {
2932 			eop = 1;
2933 			if (desc_len < ETHER_CRC_LEN) {
2934 				len = 0;
2935 				prev_len_adj = ETHER_CRC_LEN - desc_len;
2936 			} else if (sc->hw.mac_type == em_i210 ||
2937 			    sc->hw.mac_type == em_i350)
2938 				len = desc_len;
2939 			else
2940 				len = desc_len - ETHER_CRC_LEN;
2941 		} else {
2942 			eop = 0;
2943 			len = desc_len;
2944 		}
2945 
2946 		if (desc->errors & E1000_RXD_ERR_FRAME_ERR_MASK) {
2947 			u_int8_t last_byte;
2948 			u_int32_t pkt_len = desc_len;
2949 
2950 			if (que->rx.fmp != NULL)
2951 				pkt_len += que->rx.fmp->m_pkthdr.len;
2952 
2953 			last_byte = *(mtod(m, caddr_t) + desc_len - 1);
2954 			if (TBI_ACCEPT(&sc->hw, status, desc->errors,
2955 			    pkt_len, last_byte)) {
2956 #if NKSTAT > 0
2957 				em_tbi_adjust_stats(sc,
2958 				    pkt_len, sc->hw.mac_addr);
2959 #endif
2960 				if (len > 0)
2961 					len--;
2962 			} else
2963 				accept_frame = 0;
2964 		}
2965 
2966 		if (accept_frame) {
2967 			/* Assign correct length to the current fragment */
2968 			m->m_len = len;
2969 
2970 			if (que->rx.fmp == NULL) {
2971 				m->m_pkthdr.len = m->m_len;
2972 				que->rx.fmp = m;	 /* Store the first mbuf */
2973 				que->rx.lmp = m;
2974 			} else {
2975 				/* Chain mbuf's together */
2976 				m->m_flags &= ~M_PKTHDR;
2977 				/*
2978 				 * Adjust length of previous mbuf in chain if
2979 				 * we received less than 4 bytes in the last
2980 				 * descriptor.
2981 				 */
2982 				if (prev_len_adj > 0) {
2983 					que->rx.lmp->m_len -= prev_len_adj;
2984 					que->rx.fmp->m_pkthdr.len -= prev_len_adj;
2985 				}
2986 				que->rx.lmp->m_next = m;
2987 				que->rx.lmp = m;
2988 				que->rx.fmp->m_pkthdr.len += m->m_len;
2989 			}
2990 
2991 			if (eop) {
2992 				m = que->rx.fmp;
2993 
2994 				em_receive_checksum(sc, desc, m);
2995 #if NVLAN > 0
2996 				if (desc->status & E1000_RXD_STAT_VP) {
2997 					m->m_pkthdr.ether_vtag =
2998 					    letoh16(desc->special);
2999 					m->m_flags |= M_VLANTAG;
3000 				}
3001 #endif
3002 				ml_enqueue(&ml, m);
3003 
3004 				que->rx.fmp = NULL;
3005 				que->rx.lmp = NULL;
3006 			}
3007 		} else {
3008 			que->rx.dropped_pkts++;
3009 
3010 			if (que->rx.fmp != NULL) {
3011 				m_freem(que->rx.fmp);
3012 				que->rx.fmp = NULL;
3013 				que->rx.lmp = NULL;
3014 			}
3015 
3016 			m_freem(m);
3017 		}
3018 
3019 		/* Advance our pointers to the next descriptor. */
3020 		if (++i == sc->sc_rx_slots)
3021 			i = 0;
3022 	} while (if_rxr_inuse(&que->rx.sc_rx_ring) > 0);
3023 
3024 	bus_dmamap_sync(sc->sc_dmat, que->rx.sc_rx_dma.dma_map,
3025 	    0, que->rx.sc_rx_dma.dma_map->dm_mapsize,
3026 	    BUS_DMASYNC_PREREAD);
3027 
3028 	que->rx.sc_rx_desc_tail = i;
3029 
3030 	if (ifiq_input(&ifp->if_rcv, &ml))
3031 		if_rxr_livelocked(&que->rx.sc_rx_ring);
3032 
3033 	return (rv);
3034 }
3035 
3036 /*********************************************************************
3037  *
3038  *  Verify that the hardware indicated that the checksum is valid.
3039  *  Inform the stack about the status of checksum so that stack
3040  *  doesn't spend time verifying the checksum.
3041  *
3042  *********************************************************************/
3043 void
3044 em_receive_checksum(struct em_softc *sc, struct em_rx_desc *rx_desc,
3045     struct mbuf *mp)
3046 {
3047 	/* 82543 or newer only */
3048 	if ((sc->hw.mac_type < em_82543) ||
3049 	    /* Ignore Checksum bit is set */
3050 	    (rx_desc->status & E1000_RXD_STAT_IXSM)) {
3051 		mp->m_pkthdr.csum_flags = 0;
3052 		return;
3053 	}
3054 
3055 	if (rx_desc->status & E1000_RXD_STAT_IPCS) {
3056 		/* Did it pass? */
3057 		if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
3058 			/* IP Checksum Good */
3059 			mp->m_pkthdr.csum_flags = M_IPV4_CSUM_IN_OK;
3060 
3061 		} else
3062 			mp->m_pkthdr.csum_flags = 0;
3063 	}
3064 
3065 	if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
3066 		/* Did it pass? */
3067 		if (!(rx_desc->errors & E1000_RXD_ERR_TCPE))
3068 			mp->m_pkthdr.csum_flags |=
3069 				M_TCP_CSUM_IN_OK | M_UDP_CSUM_IN_OK;
3070 	}
3071 }
3072 
3073 /*
3074  * This turns on the hardware offload of the VLAN
3075  * tag insertion and strip
3076  */
3077 void
3078 em_enable_hw_vlans(struct em_softc *sc)
3079 {
3080 	uint32_t ctrl;
3081 
3082 	ctrl = E1000_READ_REG(&sc->hw, CTRL);
3083 	ctrl |= E1000_CTRL_VME;
3084 	E1000_WRITE_REG(&sc->hw, CTRL, ctrl);
3085 }
3086 
3087 void
3088 em_enable_intr(struct em_softc *sc)
3089 {
3090 	uint32_t mask;
3091 
3092 	if (sc->msix) {
3093 		mask = sc->msix_queuesmask | sc->msix_linkmask;
3094 		E1000_WRITE_REG(&sc->hw, EIAC, mask);
3095 		E1000_WRITE_REG(&sc->hw, EIAM, mask);
3096 		E1000_WRITE_REG(&sc->hw, EIMS, mask);
3097 		E1000_WRITE_REG(&sc->hw, IMS, E1000_IMS_LSC);
3098 	} else
3099 		E1000_WRITE_REG(&sc->hw, IMS, (IMS_ENABLE_MASK));
3100 }
3101 
3102 void
3103 em_disable_intr(struct em_softc *sc)
3104 {
3105 	/*
3106 	 * The first version of 82542 had an errata where when link
3107 	 * was forced it would stay up even if the cable was disconnected
3108 	 * Sequence errors were used to detect the disconnect and then
3109 	 * the driver would unforce the link.  This code is in the ISR.
3110 	 * For this to work correctly the Sequence error interrupt had
3111 	 * to be enabled all the time.
3112 	 */
3113 	if (sc->msix) {
3114 		E1000_WRITE_REG(&sc->hw, EIMC, ~0);
3115 		E1000_WRITE_REG(&sc->hw, EIAC, 0);
3116 	} else if (sc->hw.mac_type == em_82542_rev2_0)
3117 		E1000_WRITE_REG(&sc->hw, IMC, (0xffffffff & ~E1000_IMC_RXSEQ));
3118 	else
3119 		E1000_WRITE_REG(&sc->hw, IMC, 0xffffffff);
3120 }
3121 
3122 void
3123 em_write_pci_cfg(struct em_hw *hw, uint32_t reg, uint16_t *value)
3124 {
3125 	struct pci_attach_args *pa = &((struct em_osdep *)hw->back)->em_pa;
3126 	pcireg_t val;
3127 
3128 	val = pci_conf_read(pa->pa_pc, pa->pa_tag, reg & ~0x3);
3129 	if (reg & 0x2) {
3130 		val &= 0x0000ffff;
3131 		val |= (*value << 16);
3132 	} else {
3133 		val &= 0xffff0000;
3134 		val |= *value;
3135 	}
3136 	pci_conf_write(pa->pa_pc, pa->pa_tag, reg & ~0x3, val);
3137 }
3138 
3139 void
3140 em_read_pci_cfg(struct em_hw *hw, uint32_t reg, uint16_t *value)
3141 {
3142 	struct pci_attach_args *pa = &((struct em_osdep *)hw->back)->em_pa;
3143 	pcireg_t val;
3144 
3145 	val = pci_conf_read(pa->pa_pc, pa->pa_tag, reg & ~0x3);
3146 	if (reg & 0x2)
3147 		*value = (val >> 16) & 0xffff;
3148 	else
3149 		*value = val & 0xffff;
3150 }
3151 
3152 void
3153 em_pci_set_mwi(struct em_hw *hw)
3154 {
3155 	struct pci_attach_args *pa = &((struct em_osdep *)hw->back)->em_pa;
3156 
3157 	pci_conf_write(pa->pa_pc, pa->pa_tag, PCI_COMMAND_STATUS_REG,
3158 		(hw->pci_cmd_word | CMD_MEM_WRT_INVALIDATE));
3159 }
3160 
3161 void
3162 em_pci_clear_mwi(struct em_hw *hw)
3163 {
3164 	struct pci_attach_args *pa = &((struct em_osdep *)hw->back)->em_pa;
3165 
3166 	pci_conf_write(pa->pa_pc, pa->pa_tag, PCI_COMMAND_STATUS_REG,
3167 		(hw->pci_cmd_word & ~CMD_MEM_WRT_INVALIDATE));
3168 }
3169 
3170 /*
3171  * We may eventually really do this, but its unnecessary
3172  * for now so we just return unsupported.
3173  */
3174 int32_t
3175 em_read_pcie_cap_reg(struct em_hw *hw, uint32_t reg, uint16_t *value)
3176 {
3177 	return -E1000_NOT_IMPLEMENTED;
3178 }
3179 
3180 /*********************************************************************
3181 * 82544 Coexistence issue workaround.
3182 *    There are 2 issues.
3183 *       1. Transmit Hang issue.
3184 *    To detect this issue, following equation can be used...
3185 *          SIZE[3:0] + ADDR[2:0] = SUM[3:0].
3186 *          If SUM[3:0] is in between 1 to 4, we will have this issue.
3187 *
3188 *       2. DAC issue.
3189 *    To detect this issue, following equation can be used...
3190 *          SIZE[3:0] + ADDR[2:0] = SUM[3:0].
3191 *          If SUM[3:0] is in between 9 to c, we will have this issue.
3192 *
3193 *
3194 *    WORKAROUND:
3195 *          Make sure we do not have ending address as 1,2,3,4(Hang) or 9,a,b,c (DAC)
3196 *
3197 *** *********************************************************************/
3198 u_int32_t
3199 em_fill_descriptors(u_int64_t address, u_int32_t length,
3200     PDESC_ARRAY desc_array)
3201 {
3202         /* Since issue is sensitive to length and address.*/
3203         /* Let us first check the address...*/
3204         u_int32_t safe_terminator;
3205         if (length <= 4) {
3206                 desc_array->descriptor[0].address = address;
3207                 desc_array->descriptor[0].length = length;
3208                 desc_array->elements = 1;
3209                 return desc_array->elements;
3210         }
3211         safe_terminator = (u_int32_t)((((u_int32_t)address & 0x7) + (length & 0xF)) & 0xF);
3212         /* if it does not fall between 0x1 to 0x4 and 0x9 to 0xC then return */
3213         if (safe_terminator == 0   ||
3214         (safe_terminator > 4   &&
3215         safe_terminator < 9)   ||
3216         (safe_terminator > 0xC &&
3217         safe_terminator <= 0xF)) {
3218                 desc_array->descriptor[0].address = address;
3219                 desc_array->descriptor[0].length = length;
3220                 desc_array->elements = 1;
3221                 return desc_array->elements;
3222         }
3223 
3224         desc_array->descriptor[0].address = address;
3225         desc_array->descriptor[0].length = length - 4;
3226         desc_array->descriptor[1].address = address + (length - 4);
3227         desc_array->descriptor[1].length = 4;
3228         desc_array->elements = 2;
3229         return desc_array->elements;
3230 }
3231 
3232 /*
3233  * Disable the L0S and L1 LINK states.
3234  */
3235 void
3236 em_disable_aspm(struct em_softc *sc)
3237 {
3238 	int offset;
3239 	pcireg_t val;
3240 
3241 	switch (sc->hw.mac_type) {
3242 		case em_82571:
3243 		case em_82572:
3244 		case em_82573:
3245 		case em_82574:
3246 			break;
3247 		default:
3248 			return;
3249 	}
3250 
3251 	if (!pci_get_capability(sc->osdep.em_pa.pa_pc, sc->osdep.em_pa.pa_tag,
3252 	    PCI_CAP_PCIEXPRESS, &offset, NULL))
3253 		return;
3254 
3255 	/* Disable PCIe Active State Power Management (ASPM). */
3256 	val = pci_conf_read(sc->osdep.em_pa.pa_pc, sc->osdep.em_pa.pa_tag,
3257 	    offset + PCI_PCIE_LCSR);
3258 
3259 	switch (sc->hw.mac_type) {
3260 		case em_82571:
3261 		case em_82572:
3262 			val &= ~PCI_PCIE_LCSR_ASPM_L1;
3263 			break;
3264 		case em_82573:
3265 		case em_82574:
3266 			val &= ~(PCI_PCIE_LCSR_ASPM_L0S |
3267 			    PCI_PCIE_LCSR_ASPM_L1);
3268 			break;
3269 		default:
3270 			break;
3271 	}
3272 
3273 	pci_conf_write(sc->osdep.em_pa.pa_pc, sc->osdep.em_pa.pa_tag,
3274 	    offset + PCI_PCIE_LCSR, val);
3275 }
3276 
3277 /*
3278  * em_flush_tx_ring - remove all descriptors from the tx_ring
3279  *
3280  * We want to clear all pending descriptors from the TX ring.
3281  * zeroing happens when the HW reads the regs. We assign the ring itself as
3282  * the data of the next descriptor. We don't care about the data we are about
3283  * to reset the HW.
3284  */
3285 void
3286 em_flush_tx_ring(struct em_queue *que)
3287 {
3288 	struct em_softc		*sc = que->sc;
3289 	uint32_t		 tctl, txd_lower = E1000_TXD_CMD_IFCS;
3290 	uint16_t		 size = 512;
3291 	struct em_tx_desc	*txd;
3292 
3293 	KASSERT(que->tx.sc_tx_desc_ring != NULL);
3294 
3295 	tctl = EM_READ_REG(&sc->hw, E1000_TCTL);
3296 	EM_WRITE_REG(&sc->hw, E1000_TCTL, tctl | E1000_TCTL_EN);
3297 
3298 	KASSERT(EM_READ_REG(&sc->hw, E1000_TDT(que->me)) == que->tx.sc_tx_desc_head);
3299 
3300 	txd = &que->tx.sc_tx_desc_ring[que->tx.sc_tx_desc_head];
3301 	txd->buffer_addr = que->tx.sc_tx_dma.dma_map->dm_segs[0].ds_addr;
3302 	txd->lower.data = htole32(txd_lower | size);
3303 	txd->upper.data = 0;
3304 
3305 	/* flush descriptors to memory before notifying the HW */
3306 	bus_space_barrier(sc->osdep.mem_bus_space_tag,
3307 	    sc->osdep.mem_bus_space_handle, 0, 0, BUS_SPACE_BARRIER_WRITE);
3308 
3309 	if (++que->tx.sc_tx_desc_head == sc->sc_tx_slots)
3310 		que->tx.sc_tx_desc_head = 0;
3311 
3312 	EM_WRITE_REG(&sc->hw, E1000_TDT(que->me), que->tx.sc_tx_desc_head);
3313 	bus_space_barrier(sc->osdep.mem_bus_space_tag, sc->osdep.mem_bus_space_handle,
3314 	    0, 0, BUS_SPACE_BARRIER_READ|BUS_SPACE_BARRIER_WRITE);
3315 	usec_delay(250);
3316 }
3317 
3318 /*
3319  * em_flush_rx_ring - remove all descriptors from the rx_ring
3320  *
3321  * Mark all descriptors in the RX ring as consumed and disable the rx ring
3322  */
3323 void
3324 em_flush_rx_ring(struct em_queue *que)
3325 {
3326 	uint32_t	rctl, rxdctl;
3327 	struct em_softc	*sc = que->sc;
3328 
3329 	rctl = EM_READ_REG(&sc->hw, E1000_RCTL);
3330 	EM_WRITE_REG(&sc->hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
3331 	E1000_WRITE_FLUSH(&sc->hw);
3332 	usec_delay(150);
3333 
3334 	rxdctl = EM_READ_REG(&sc->hw, E1000_RXDCTL(que->me));
3335 	/* zero the lower 14 bits (prefetch and host thresholds) */
3336 	rxdctl &= 0xffffc000;
3337 	/*
3338 	 * update thresholds: prefetch threshold to 31, host threshold to 1
3339 	 * and make sure the granularity is "descriptors" and not "cache lines"
3340 	 */
3341 	rxdctl |= (0x1F | (1 << 8) | E1000_RXDCTL_THRESH_UNIT_DESC);
3342 	EM_WRITE_REG(&sc->hw, E1000_RXDCTL(que->me), rxdctl);
3343 
3344 	/* momentarily enable the RX ring for the changes to take effect */
3345 	EM_WRITE_REG(&sc->hw, E1000_RCTL, rctl | E1000_RCTL_EN);
3346 	E1000_WRITE_FLUSH(&sc->hw);
3347 	usec_delay(150);
3348 	EM_WRITE_REG(&sc->hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
3349 }
3350 
3351 /*
3352  * em_flush_desc_rings - remove all descriptors from the descriptor rings
3353  *
3354  * In i219, the descriptor rings must be emptied before resetting the HW
3355  * or before changing the device state to D3 during runtime (runtime PM).
3356  *
3357  * Failure to do this will cause the HW to enter a unit hang state which can
3358  * only be released by PCI reset on the device
3359  *
3360  */
3361 void
3362 em_flush_desc_rings(struct em_softc *sc)
3363 {
3364 	struct em_queue		*que = sc->queues; /* Use only first queue. */
3365 	struct pci_attach_args	*pa = &sc->osdep.em_pa;
3366 	uint32_t		 fextnvm11, tdlen;
3367 	uint16_t		 hang_state;
3368 
3369 	/* First, disable MULR fix in FEXTNVM11 */
3370 	fextnvm11 = EM_READ_REG(&sc->hw, E1000_FEXTNVM11);
3371 	fextnvm11 |= E1000_FEXTNVM11_DISABLE_MULR_FIX;
3372 	EM_WRITE_REG(&sc->hw, E1000_FEXTNVM11, fextnvm11);
3373 
3374 	/* do nothing if we're not in faulty state, or if the queue is empty */
3375 	tdlen = EM_READ_REG(&sc->hw, E1000_TDLEN(que->me));
3376 	hang_state = pci_conf_read(pa->pa_pc, pa->pa_tag, PCICFG_DESC_RING_STATUS);
3377 	if (!(hang_state & FLUSH_DESC_REQUIRED) || !tdlen)
3378 		return;
3379 	em_flush_tx_ring(que);
3380 
3381 	/* recheck, maybe the fault is caused by the rx ring */
3382 	hang_state = pci_conf_read(pa->pa_pc, pa->pa_tag, PCICFG_DESC_RING_STATUS);
3383 	if (hang_state & FLUSH_DESC_REQUIRED)
3384 		em_flush_rx_ring(que);
3385 }
3386 
3387 int
3388 em_allocate_legacy(struct em_softc *sc)
3389 {
3390 	pci_intr_handle_t	 ih;
3391 	const char		*intrstr = NULL;
3392 	struct pci_attach_args	*pa = &sc->osdep.em_pa;
3393 	pci_chipset_tag_t	 pc = pa->pa_pc;
3394 
3395 	if (pci_intr_map_msi(pa, &ih)) {
3396 		if (pci_intr_map(pa, &ih)) {
3397 			printf(": couldn't map interrupt\n");
3398 			return (ENXIO);
3399 		}
3400 		sc->legacy_irq = 1;
3401 	}
3402 
3403 	intrstr = pci_intr_string(pc, ih);
3404 	sc->sc_intrhand = pci_intr_establish(pc, ih, IPL_NET | IPL_MPSAFE,
3405 	    em_intr, sc, DEVNAME(sc));
3406 	if (sc->sc_intrhand == NULL) {
3407 		printf(": couldn't establish interrupt");
3408 		if (intrstr != NULL)
3409 			printf(" at %s", intrstr);
3410 		printf("\n");
3411 		return (ENXIO);
3412 	}
3413 	printf(": %s", intrstr);
3414 
3415 	return (0);
3416 }
3417 
3418 #if NKSTAT > 0
3419 /* this is used to look up the array of kstats quickly */
3420 enum em_stat {
3421 	em_stat_crcerrs,
3422 	em_stat_algnerrc,
3423 	em_stat_symerrs,
3424 	em_stat_rxerrc,
3425 	em_stat_mpc,
3426 	em_stat_scc,
3427 	em_stat_ecol,
3428 	em_stat_mcc,
3429 	em_stat_latecol,
3430 	em_stat_colc,
3431 	em_stat_dc,
3432 	em_stat_tncrs,
3433 	em_stat_sec,
3434 	em_stat_cexterr,
3435 	em_stat_rlec,
3436 	em_stat_xonrxc,
3437 	em_stat_xontxc,
3438 	em_stat_xoffrxc,
3439 	em_stat_xofftxc,
3440 	em_stat_fcruc,
3441 	em_stat_prc64,
3442 	em_stat_prc127,
3443 	em_stat_prc255,
3444 	em_stat_prc511,
3445 	em_stat_prc1023,
3446 	em_stat_prc1522,
3447 	em_stat_gprc,
3448 	em_stat_bprc,
3449 	em_stat_mprc,
3450 	em_stat_gptc,
3451 	em_stat_gorc,
3452 	em_stat_gotc,
3453 	em_stat_rnbc,
3454 	em_stat_ruc,
3455 	em_stat_rfc,
3456 	em_stat_roc,
3457 	em_stat_rjc,
3458 	em_stat_mgtprc,
3459 	em_stat_mgtpdc,
3460 	em_stat_mgtptc,
3461 	em_stat_tor,
3462 	em_stat_tot,
3463 	em_stat_tpr,
3464 	em_stat_tpt,
3465 	em_stat_ptc64,
3466 	em_stat_ptc127,
3467 	em_stat_ptc255,
3468 	em_stat_ptc511,
3469 	em_stat_ptc1023,
3470 	em_stat_ptc1522,
3471 	em_stat_mptc,
3472 	em_stat_bptc,
3473 #if 0
3474 	em_stat_tsctc,
3475 	em_stat_tsctf,
3476 #endif
3477 
3478 	em_stat_count,
3479 };
3480 
3481 struct em_counter {
3482 	const char		*name;
3483 	enum kstat_kv_unit	 unit;
3484 	uint32_t		 reg;
3485 };
3486 
3487 static const struct em_counter em_counters[em_stat_count] = {
3488 	[em_stat_crcerrs] =
3489 	    { "rx crc errs",	KSTAT_KV_U_PACKETS,	E1000_CRCERRS },
3490 	[em_stat_algnerrc] = /* >= em_82543 */
3491 	    { "rx align errs",	KSTAT_KV_U_PACKETS,	0 },
3492 	[em_stat_symerrs] = /* >= em_82543 */
3493 	    { "rx align errs",	KSTAT_KV_U_PACKETS,	0 },
3494 	[em_stat_rxerrc] =
3495 	    { "rx errs",	KSTAT_KV_U_PACKETS,	E1000_RXERRC },
3496 	[em_stat_mpc] =
3497 	    { "rx missed",	KSTAT_KV_U_PACKETS,	E1000_MPC },
3498 	[em_stat_scc] =
3499 	    { "tx single coll",	KSTAT_KV_U_PACKETS,	E1000_SCC },
3500 	[em_stat_ecol] =
3501 	    { "tx excess coll",	KSTAT_KV_U_PACKETS,	E1000_ECOL },
3502 	[em_stat_mcc] =
3503 	    { "tx multi coll",	KSTAT_KV_U_PACKETS,	E1000_MCC },
3504 	[em_stat_latecol] =
3505 	    { "tx late coll",	KSTAT_KV_U_PACKETS,	E1000_LATECOL },
3506 	[em_stat_colc] =
3507 	    { "tx coll",	KSTAT_KV_U_NONE,	E1000_COLC },
3508 	[em_stat_dc] =
3509 	    { "tx defers",	KSTAT_KV_U_NONE,	E1000_DC },
3510 	[em_stat_tncrs] = /* >= em_82543 */
3511 	    { "tx no CRS",	KSTAT_KV_U_PACKETS,	0 },
3512 	[em_stat_sec] =
3513 	    { "seq errs",	KSTAT_KV_U_NONE,	E1000_SEC },
3514 	[em_stat_cexterr] = /* >= em_82543 */
3515 	    { "carr ext errs",	KSTAT_KV_U_PACKETS,	0 },
3516 	[em_stat_rlec] =
3517 	    { "rx len errs",	KSTAT_KV_U_PACKETS,	E1000_RLEC },
3518 	[em_stat_xonrxc] =
3519 	    { "rx xon",		KSTAT_KV_U_PACKETS,	E1000_XONRXC },
3520 	[em_stat_xontxc] =
3521 	    { "tx xon",		KSTAT_KV_U_PACKETS,	E1000_XONTXC },
3522 	[em_stat_xoffrxc] =
3523 	    { "rx xoff",	KSTAT_KV_U_PACKETS,	E1000_XOFFRXC },
3524 	[em_stat_xofftxc] =
3525 	    { "tx xoff",	KSTAT_KV_U_PACKETS,	E1000_XOFFTXC },
3526 	[em_stat_fcruc] =
3527 	    { "FC unsupported",	KSTAT_KV_U_PACKETS,	E1000_FCRUC },
3528 	[em_stat_prc64] =
3529 	    { "rx 64B",		KSTAT_KV_U_PACKETS,	E1000_PRC64 },
3530 	[em_stat_prc127] =
3531 	    { "rx 65-127B",	KSTAT_KV_U_PACKETS,	E1000_PRC127 },
3532 	[em_stat_prc255] =
3533 	    { "rx 128-255B",	KSTAT_KV_U_PACKETS,	E1000_PRC255 },
3534 	[em_stat_prc511] =
3535 	    { "rx 256-511B",	KSTAT_KV_U_PACKETS,	E1000_PRC511 },
3536 	[em_stat_prc1023] =
3537 	    { "rx 512-1023B",	KSTAT_KV_U_PACKETS,	E1000_PRC1023 },
3538 	[em_stat_prc1522] =
3539 	    { "rx 1024-maxB",	KSTAT_KV_U_PACKETS,	E1000_PRC1522 },
3540 	[em_stat_gprc] =
3541 	    { "rx good",	KSTAT_KV_U_PACKETS,	E1000_GPRC },
3542 	[em_stat_bprc] =
3543 	    { "rx bcast",	KSTAT_KV_U_PACKETS,	E1000_BPRC },
3544 	[em_stat_mprc] =
3545 	    { "rx mcast",	KSTAT_KV_U_PACKETS,	E1000_MPRC },
3546 	[em_stat_gptc] =
3547 	    { "tx good",	KSTAT_KV_U_PACKETS,	E1000_GPTC },
3548 	[em_stat_gorc] = /* 64bit */
3549 	    { "rx good",	KSTAT_KV_U_BYTES,	0 },
3550 	[em_stat_gotc] = /* 64bit */
3551 	    { "tx good",	KSTAT_KV_U_BYTES,	0 },
3552 	[em_stat_rnbc] =
3553 	    { "rx no buffers",	KSTAT_KV_U_PACKETS,	E1000_RNBC },
3554 	[em_stat_ruc] =
3555 	    { "rx undersize",	KSTAT_KV_U_PACKETS,	E1000_RUC },
3556 	[em_stat_rfc] =
3557 	    { "rx fragments",	KSTAT_KV_U_PACKETS,	E1000_RFC },
3558 	[em_stat_roc] =
3559 	    { "rx oversize",	KSTAT_KV_U_PACKETS,	E1000_ROC },
3560 	[em_stat_rjc] =
3561 	    { "rx jabbers",	KSTAT_KV_U_PACKETS,	E1000_RJC },
3562 	[em_stat_mgtprc] =
3563 	    { "rx mgmt",	KSTAT_KV_U_PACKETS,	E1000_MGTPRC },
3564 	[em_stat_mgtpdc] =
3565 	    { "rx mgmt drops",	KSTAT_KV_U_PACKETS,	E1000_MGTPDC },
3566 	[em_stat_mgtptc] =
3567 	    { "tx mgmt",	KSTAT_KV_U_PACKETS,	E1000_MGTPTC },
3568 	[em_stat_tor] = /* 64bit */
3569 	    { "rx total",	KSTAT_KV_U_BYTES,	0 },
3570 	[em_stat_tot] = /* 64bit */
3571 	    { "tx total",	KSTAT_KV_U_BYTES,	0 },
3572 	[em_stat_tpr] =
3573 	    { "rx total",	KSTAT_KV_U_PACKETS,	E1000_TPR },
3574 	[em_stat_tpt] =
3575 	    { "tx total",	KSTAT_KV_U_PACKETS,	E1000_TPT },
3576 	[em_stat_ptc64] =
3577 	    { "tx 64B",		KSTAT_KV_U_PACKETS,	E1000_PTC64 },
3578 	[em_stat_ptc127] =
3579 	    { "tx 65-127B",	KSTAT_KV_U_PACKETS,	E1000_PTC127 },
3580 	[em_stat_ptc255] =
3581 	    { "tx 128-255B",	KSTAT_KV_U_PACKETS,	E1000_PTC255 },
3582 	[em_stat_ptc511] =
3583 	    { "tx 256-511B",	KSTAT_KV_U_PACKETS,	E1000_PTC511 },
3584 	[em_stat_ptc1023] =
3585 	    { "tx 512-1023B",	KSTAT_KV_U_PACKETS,	E1000_PTC1023 },
3586 	[em_stat_ptc1522] =
3587 	    { "tx 1024-maxB",	KSTAT_KV_U_PACKETS,	E1000_PTC1522 },
3588 	[em_stat_mptc] =
3589 	    { "tx mcast",	KSTAT_KV_U_PACKETS,	E1000_MPTC },
3590 	[em_stat_bptc] =
3591 	    { "tx bcast",	KSTAT_KV_U_PACKETS,	E1000_BPTC },
3592 };
3593 
3594 /**********************************************************************
3595  *
3596  *  Update the board statistics counters.
3597  *
3598  **********************************************************************/
3599 int
3600 em_kstat_read(struct kstat *ks)
3601 {
3602 	struct em_softc *sc = ks->ks_softc;
3603 	struct em_hw *hw = &sc->hw;
3604 	struct kstat_kv *kvs = ks->ks_data;
3605 	uint32_t lo, hi;
3606 	unsigned int i;
3607 
3608 	for (i = 0; i < nitems(em_counters); i++) {
3609 		const struct em_counter *c = &em_counters[i];
3610 		if (c->reg == 0)
3611 			continue;
3612 
3613 		kstat_kv_u64(&kvs[i]) += EM_READ_REG(hw,
3614 		    E1000_REG_TR(hw, c->reg)); /* wtf */
3615 	}
3616 
3617 	/* Handle the exceptions. */
3618 
3619 	if (sc->hw.mac_type >= em_82543) {
3620 		kstat_kv_u64(&kvs[em_stat_algnerrc]) +=
3621 		    E1000_READ_REG(hw, ALGNERRC);
3622 		kstat_kv_u64(&kvs[em_stat_rxerrc]) +=
3623 		    E1000_READ_REG(hw, RXERRC);
3624 		kstat_kv_u64(&kvs[em_stat_cexterr]) +=
3625 		    E1000_READ_REG(hw, CEXTERR);
3626 		kstat_kv_u64(&kvs[em_stat_tncrs]) +=
3627 		    E1000_READ_REG(hw, TNCRS);
3628 #if 0
3629 		sc->stats.tsctc +=
3630 		E1000_READ_REG(hw, TSCTC);
3631 		sc->stats.tsctfc +=
3632 		E1000_READ_REG(hw, TSCTFC);
3633 #endif
3634 	}
3635 
3636 	/* For the 64-bit byte counters the low dword must be read first. */
3637 	/* Both registers clear on the read of the high dword */
3638 
3639 	lo = E1000_READ_REG(hw, GORCL);
3640 	hi = E1000_READ_REG(hw, GORCH);
3641 	kstat_kv_u64(&kvs[em_stat_gorc]) +=
3642 	    ((uint64_t)hi << 32) | (uint64_t)lo;
3643 
3644 	lo = E1000_READ_REG(hw, GOTCL);
3645 	hi = E1000_READ_REG(hw, GOTCH);
3646 	kstat_kv_u64(&kvs[em_stat_gotc]) +=
3647 	    ((uint64_t)hi << 32) | (uint64_t)lo;
3648 
3649 	lo = E1000_READ_REG(hw, TORL);
3650 	hi = E1000_READ_REG(hw, TORH);
3651 	kstat_kv_u64(&kvs[em_stat_tor]) +=
3652 	    ((uint64_t)hi << 32) | (uint64_t)lo;
3653 
3654 	lo = E1000_READ_REG(hw, TOTL);
3655 	hi = E1000_READ_REG(hw, TOTH);
3656 	kstat_kv_u64(&kvs[em_stat_tot]) +=
3657 	    ((uint64_t)hi << 32) | (uint64_t)lo;
3658 
3659 	getnanouptime(&ks->ks_updated);
3660 
3661 	return (0);
3662 }
3663 
3664 void
3665 em_kstat_attach(struct em_softc *sc)
3666 {
3667 	struct kstat *ks;
3668 	struct kstat_kv *kvs;
3669 	unsigned int i;
3670 
3671 	mtx_init(&sc->kstat_mtx, IPL_SOFTCLOCK);
3672 
3673 	ks = kstat_create(DEVNAME(sc), 0, "em-stats", 0,
3674 	    KSTAT_T_KV, 0);
3675 	if (ks == NULL)
3676 		return;
3677 
3678 	kvs = mallocarray(nitems(em_counters), sizeof(*kvs),
3679 	    M_DEVBUF, M_WAITOK|M_ZERO);
3680 	for (i = 0; i < nitems(em_counters); i++) {
3681 		const struct em_counter *c = &em_counters[i];
3682 		kstat_kv_unit_init(&kvs[i], c->name,
3683 		    KSTAT_KV_T_COUNTER64, c->unit);
3684 	}
3685 
3686 	ks->ks_softc = sc;
3687 	ks->ks_data = kvs;
3688 	ks->ks_datalen = nitems(em_counters) * sizeof(*kvs);
3689 	ks->ks_read = em_kstat_read;
3690 	kstat_set_mutex(ks, &sc->kstat_mtx);
3691 
3692 	kstat_install(ks);
3693 }
3694 
3695 /******************************************************************************
3696  * Adjusts the statistic counters when a frame is accepted by TBI_ACCEPT
3697  *****************************************************************************/
3698 void
3699 em_tbi_adjust_stats(struct em_softc *sc, uint32_t frame_len, uint8_t *mac_addr)
3700 {
3701 	struct em_hw *hw = &sc->hw;
3702 	struct kstat *ks = sc->kstat;
3703 	struct kstat_kv *kvs;
3704 
3705 	if (ks == NULL)
3706 		return;
3707 
3708 	/* First adjust the frame length. */
3709 	frame_len--;
3710 
3711 	mtx_enter(&sc->kstat_mtx);
3712 	kvs = ks->ks_data;
3713 
3714 	/*
3715 	 * We need to adjust the statistics counters, since the hardware
3716 	 * counters overcount this packet as a CRC error and undercount the
3717 	 * packet as a good packet
3718 	 */
3719 
3720 	/* This packet should not be counted as a CRC error.	*/
3721 	kstat_kv_u64(&kvs[em_stat_crcerrs])--;
3722 	/* This packet does count as a Good Packet Received.	*/
3723 	kstat_kv_u64(&kvs[em_stat_gprc])++;
3724 
3725 	/* Adjust the Good Octets received counters		*/
3726 	kstat_kv_u64(&kvs[em_stat_gorc]) += frame_len;
3727 
3728 	/*
3729 	 * Is this a broadcast or multicast?  Check broadcast first, since
3730 	 * the test for a multicast frame will test positive on a broadcast
3731 	 * frame.
3732 	 */
3733 	if (ETHER_IS_BROADCAST(mac_addr)) {
3734 		/* Broadcast packet */
3735 		kstat_kv_u64(&kvs[em_stat_bprc])++;
3736 	} else if (ETHER_IS_MULTICAST(mac_addr)) {
3737 		/* Multicast packet */
3738 		kstat_kv_u64(&kvs[em_stat_mprc])++;
3739 	}
3740 
3741 	if (frame_len == hw->max_frame_size) {
3742 		/*
3743 		 * In this case, the hardware has overcounted the number of
3744 		 * oversize frames.
3745 		 */
3746 		kstat_kv_u64(&kvs[em_stat_roc])--;
3747 	}
3748 
3749 	/*
3750 	 * Adjust the bin counters when the extra byte put the frame in the
3751 	 * wrong bin. Remember that the frame_len was adjusted above.
3752 	 */
3753 	if (frame_len == 64) {
3754 		kstat_kv_u64(&kvs[em_stat_prc64])++;
3755 		kstat_kv_u64(&kvs[em_stat_prc127])--;
3756 	} else if (frame_len == 127) {
3757 		kstat_kv_u64(&kvs[em_stat_prc127])++;
3758 		kstat_kv_u64(&kvs[em_stat_prc255])--;
3759 	} else if (frame_len == 255) {
3760 		kstat_kv_u64(&kvs[em_stat_prc255])++;
3761 		kstat_kv_u64(&kvs[em_stat_prc511])--;
3762 	} else if (frame_len == 511) {
3763 		kstat_kv_u64(&kvs[em_stat_prc511])++;
3764 		kstat_kv_u64(&kvs[em_stat_prc1023])--;
3765 	} else if (frame_len == 1023) {
3766 		kstat_kv_u64(&kvs[em_stat_prc1023])++;
3767 		kstat_kv_u64(&kvs[em_stat_prc1522])--;
3768 	} else if (frame_len == 1522) {
3769 		kstat_kv_u64(&kvs[em_stat_prc1522])++;
3770 	}
3771 
3772 	mtx_leave(&sc->kstat_mtx);
3773 }
3774 #endif /* NKSTAT > 0 */
3775 
3776 #ifndef SMALL_KERNEL
3777 int
3778 em_allocate_msix(struct em_softc *sc)
3779 {
3780 	pci_intr_handle_t	 ih;
3781 	const char		*intrstr = NULL;
3782 	struct pci_attach_args	*pa = &sc->osdep.em_pa;
3783 	pci_chipset_tag_t	 pc = pa->pa_pc;
3784 	struct em_queue		*que = sc->queues; /* Use only first queue. */
3785 	int			 vec;
3786 
3787 	if (!em_enable_msix)
3788 		return (ENODEV);
3789 
3790 	switch (sc->hw.mac_type) {
3791 	case em_82576:
3792 	case em_82580:
3793 	case em_i350:
3794 	case em_i210:
3795 		break;
3796 	default:
3797 		return (ENODEV);
3798 	}
3799 
3800 	vec = 0;
3801 	if (pci_intr_map_msix(pa, vec, &ih))
3802 		return (ENODEV);
3803 	sc->msix = 1;
3804 
3805 	que->me = vec;
3806 	que->eims = 1 << vec;
3807 	snprintf(que->name, sizeof(que->name), "%s:%d", DEVNAME(sc), vec);
3808 
3809 	intrstr = pci_intr_string(pc, ih);
3810 	que->tag = pci_intr_establish(pc, ih, IPL_NET | IPL_MPSAFE,
3811 	    em_queue_intr_msix, que, que->name);
3812 	if (que->tag == NULL) {
3813 		printf(": couldn't establish interrupt");
3814 		if (intrstr != NULL)
3815 			printf(" at %s", intrstr);
3816 		printf("\n");
3817 		return (ENXIO);
3818 	}
3819 
3820 	/* Setup linkvector, use last queue vector + 1 */
3821 	vec++;
3822 	sc->msix_linkvec = vec;
3823 	if (pci_intr_map_msix(pa, sc->msix_linkvec, &ih)) {
3824 		printf(": couldn't map link vector\n");
3825 		return (ENXIO);
3826 	}
3827 
3828 	intrstr = pci_intr_string(pc, ih);
3829 	sc->sc_intrhand = pci_intr_establish(pc, ih, IPL_NET | IPL_MPSAFE,
3830 	    em_link_intr_msix, sc, DEVNAME(sc));
3831 	if (sc->sc_intrhand == NULL) {
3832 		printf(": couldn't establish interrupt");
3833 		if (intrstr != NULL)
3834 			printf(" at %s", intrstr);
3835 		printf("\n");
3836 		return (ENXIO);
3837 	}
3838 	printf(", %s, %d queue%s", intrstr, vec, (vec > 1) ? "s" : "");
3839 
3840 	return (0);
3841 }
3842 
3843 /*
3844  * Interrupt for a specific queue, (not link interrupts). The EICR bit which
3845  * maps to the EIMS bit expresses both RX and TX, therefore we can't
3846  * distinguish if this is a RX completion of TX completion and must do both.
3847  * The bits in EICR are autocleared and we _cannot_ read EICR.
3848  */
3849 int
3850 em_queue_intr_msix(void *vque)
3851 {
3852 	struct em_queue *que = vque;
3853 	struct em_softc *sc = que->sc;
3854 	struct ifnet   *ifp = &sc->sc_ac.ac_if;
3855 
3856 	if (ifp->if_flags & IFF_RUNNING) {
3857 		em_txeof(que);
3858 		if (em_rxeof(que))
3859 			em_rxrefill(que);
3860 	}
3861 
3862 	em_enable_queue_intr_msix(que);
3863 
3864 	return (1);
3865 }
3866 
3867 int
3868 em_link_intr_msix(void *arg)
3869 {
3870 	struct em_softc *sc = arg;
3871 	uint32_t icr;
3872 
3873 	icr = E1000_READ_REG(&sc->hw, ICR);
3874 
3875 	/* Link status change */
3876 	if (icr & E1000_ICR_LSC) {
3877 		KERNEL_LOCK();
3878 		sc->hw.get_link_status = 1;
3879 		em_check_for_link(&sc->hw);
3880 		em_update_link_status(sc);
3881 		KERNEL_UNLOCK();
3882 	}
3883 
3884 	/* Re-arm unconditionally */
3885 	E1000_WRITE_REG(&sc->hw, IMS, E1000_ICR_LSC);
3886 	E1000_WRITE_REG(&sc->hw, EIMS, sc->msix_linkmask);
3887 
3888 	return (1);
3889 }
3890 
3891 /*
3892  * Maps queues into msix interrupt vectors.
3893  */
3894 int
3895 em_setup_queues_msix(struct em_softc *sc)
3896 {
3897 	uint32_t ivar, newitr, index;
3898 	struct em_queue *que;
3899 
3900 	KASSERT(sc->msix);
3901 
3902 	/* First turn on RSS capability */
3903 	if (sc->hw.mac_type != em_82575)
3904 		E1000_WRITE_REG(&sc->hw, GPIE,
3905 		    E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
3906 		    E1000_GPIE_PBA | E1000_GPIE_NSICR);
3907 
3908 	/* Turn on MSIX */
3909 	switch (sc->hw.mac_type) {
3910 	case em_82580:
3911 	case em_i350:
3912 	case em_i210:
3913 		/* RX entries */
3914 		/*
3915 		 * Note, this maps Queues into MSIX vectors, it works fine.
3916 		 * The funky calculation of offsets and checking if que->me is
3917 		 * odd is due to the weird register distribution, the datasheet
3918 		 * explains it well.
3919 		 */
3920 		FOREACH_QUEUE(sc, que) {
3921 			index = que->me >> 1;
3922 			ivar = E1000_READ_REG_ARRAY(&sc->hw, IVAR0, index);
3923 			if (que->me & 1) {
3924 				ivar &= 0xFF00FFFF;
3925 				ivar |= (que->me | E1000_IVAR_VALID) << 16;
3926 			} else {
3927 				ivar &= 0xFFFFFF00;
3928 				ivar |= que->me | E1000_IVAR_VALID;
3929 			}
3930 			E1000_WRITE_REG_ARRAY(&sc->hw, IVAR0, index, ivar);
3931 		}
3932 
3933 		/* TX entries */
3934 		FOREACH_QUEUE(sc, que) {
3935 			index = que->me >> 1;
3936 			ivar = E1000_READ_REG_ARRAY(&sc->hw, IVAR0, index);
3937 			if (que->me & 1) {
3938 				ivar &= 0x00FFFFFF;
3939 				ivar |= (que->me | E1000_IVAR_VALID) << 24;
3940 			} else {
3941 				ivar &= 0xFFFF00FF;
3942 				ivar |= (que->me | E1000_IVAR_VALID) << 8;
3943 			}
3944 			E1000_WRITE_REG_ARRAY(&sc->hw, IVAR0, index, ivar);
3945 			sc->msix_queuesmask |= que->eims;
3946 		}
3947 
3948 		/* And for the link interrupt */
3949 		ivar = (sc->msix_linkvec | E1000_IVAR_VALID) << 8;
3950 		sc->msix_linkmask = 1 << sc->msix_linkvec;
3951 		E1000_WRITE_REG(&sc->hw, IVAR_MISC, ivar);
3952 		break;
3953 	case em_82576:
3954 		/* RX entries */
3955 		FOREACH_QUEUE(sc, que) {
3956 			index = que->me & 0x7; /* Each IVAR has two entries */
3957 			ivar = E1000_READ_REG_ARRAY(&sc->hw, IVAR0, index);
3958 			if (que->me < 8) {
3959 				ivar &= 0xFFFFFF00;
3960 				ivar |= que->me | E1000_IVAR_VALID;
3961 			} else {
3962 				ivar &= 0xFF00FFFF;
3963 				ivar |= (que->me | E1000_IVAR_VALID) << 16;
3964 			}
3965 			E1000_WRITE_REG_ARRAY(&sc->hw, IVAR0, index, ivar);
3966 			sc->msix_queuesmask |= que->eims;
3967 		}
3968 		/* TX entries */
3969 		FOREACH_QUEUE(sc, que) {
3970 			index = que->me & 0x7; /* Each IVAR has two entries */
3971 			ivar = E1000_READ_REG_ARRAY(&sc->hw, IVAR0, index);
3972 			if (que->me < 8) {
3973 				ivar &= 0xFFFF00FF;
3974 				ivar |= (que->me | E1000_IVAR_VALID) << 8;
3975 			} else {
3976 				ivar &= 0x00FFFFFF;
3977 				ivar |= (que->me | E1000_IVAR_VALID) << 24;
3978 			}
3979 			E1000_WRITE_REG_ARRAY(&sc->hw, IVAR0, index, ivar);
3980 			sc->msix_queuesmask |= que->eims;
3981 		}
3982 
3983 		/* And for the link interrupt */
3984 		ivar = (sc->msix_linkvec | E1000_IVAR_VALID) << 8;
3985 		sc->msix_linkmask = 1 << sc->msix_linkvec;
3986 		E1000_WRITE_REG(&sc->hw, IVAR_MISC, ivar);
3987 		break;
3988 	default:
3989 		panic("unsupported mac");
3990 		break;
3991 	}
3992 
3993 	/* Set the starting interrupt rate */
3994 	newitr = (4000000 / MAX_INTS_PER_SEC) & 0x7FFC;
3995 
3996 	if (sc->hw.mac_type == em_82575)
3997 		newitr |= newitr << 16;
3998 	else
3999 		newitr |= E1000_EITR_CNT_IGNR;
4000 
4001 	FOREACH_QUEUE(sc, que)
4002 		E1000_WRITE_REG(&sc->hw, EITR(que->me), newitr);
4003 
4004 	return (0);
4005 }
4006 
4007 void
4008 em_enable_queue_intr_msix(struct em_queue *que)
4009 {
4010 	E1000_WRITE_REG(&que->sc->hw, EIMS, que->eims);
4011 }
4012 #endif /* !SMALL_KERNEL */
4013 
4014 int
4015 em_allocate_desc_rings(struct em_softc *sc)
4016 {
4017 	struct em_queue *que;
4018 
4019 	FOREACH_QUEUE(sc, que) {
4020 		/* Allocate Transmit Descriptor ring */
4021 		if (em_dma_malloc(sc, sc->sc_tx_slots * sizeof(struct em_tx_desc),
4022 		    &que->tx.sc_tx_dma) != 0) {
4023 			printf("%s: Unable to allocate tx_desc memory\n",
4024 			    DEVNAME(sc));
4025 			return (ENOMEM);
4026 		}
4027 		que->tx.sc_tx_desc_ring =
4028 		    (struct em_tx_desc *)que->tx.sc_tx_dma.dma_vaddr;
4029 
4030 		/* Allocate Receive Descriptor ring */
4031 		if (em_dma_malloc(sc, sc->sc_rx_slots * sizeof(struct em_rx_desc),
4032 		    &que->rx.sc_rx_dma) != 0) {
4033 			printf("%s: Unable to allocate rx_desc memory\n",
4034 			    DEVNAME(sc));
4035 			return (ENOMEM);
4036 		}
4037 		que->rx.sc_rx_desc_ring =
4038 		    (struct em_rx_desc *)que->rx.sc_rx_dma.dma_vaddr;
4039 	}
4040 
4041 	return (0);
4042 }
4043 
4044 int
4045 em_get_sffpage(struct em_softc *sc, struct if_sffpage *sff)
4046 {
4047 	struct em_hw *hw = &sc->hw;
4048 	size_t i;
4049 	int off;
4050 
4051 	if (hw->mac_type != em_82575 && hw->mac_type != em_82580 &&
4052 	    hw->mac_type != em_82576 &&
4053 	    hw->mac_type != em_i210 && hw->mac_type != em_i350)
4054 		return (ENODEV);
4055 
4056 	if (sff->sff_addr == IFSFF_ADDR_EEPROM)
4057 		off = E1000_I2CCMD_SFP_DATA_ADDR(0);
4058 	else if (sff->sff_addr == IFSFF_ADDR_DDM)
4059 		off = E1000_I2CCMD_SFP_DIAG_ADDR(0);
4060 	else
4061 		return (EIO);
4062 
4063 	for (i = 0; i < sizeof(sff->sff_data); i++) {
4064 		if (em_read_sfp_data_byte(hw, off + i,
4065 		    &sff->sff_data[i]) != E1000_SUCCESS)
4066 			return (EIO);
4067 	}
4068 
4069 	return (0);
4070 }
4071