xref: /openbsd-src/sys/dev/pci/if_em.c (revision 25c4e8bd056e974b28f4a0ffd39d76c190a56013)
1 /**************************************************************************
2 
3 Copyright (c) 2001-2003, Intel Corporation
4 All rights reserved.
5 
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8 
9  1. Redistributions of source code must retain the above copyright notice,
10     this list of conditions and the following disclaimer.
11 
12  2. Redistributions in binary form must reproduce the above copyright
13     notice, this list of conditions and the following disclaimer in the
14     documentation and/or other materials provided with the distribution.
15 
16  3. Neither the name of the Intel Corporation nor the names of its
17     contributors may be used to endorse or promote products derived from
18     this software without specific prior written permission.
19 
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 POSSIBILITY OF SUCH DAMAGE.
31 
32 ***************************************************************************/
33 
34 /* $OpenBSD: if_em.c,v 1.362 2022/06/23 09:38:28 jsg Exp $ */
35 /* $FreeBSD: if_em.c,v 1.46 2004/09/29 18:28:28 mlaier Exp $ */
36 
37 #include <dev/pci/if_em.h>
38 #include <dev/pci/if_em_soc.h>
39 
40 /*********************************************************************
41  *  Driver version
42  *********************************************************************/
43 
44 #define EM_DRIVER_VERSION	"6.2.9"
45 
46 /*********************************************************************
47  *  PCI Device ID Table
48  *********************************************************************/
49 const struct pci_matchid em_devices[] = {
50 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_80003ES2LAN_CPR_DPT },
51 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_80003ES2LAN_SDS_DPT },
52 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_80003ES2LAN_CPR_SPT },
53 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_80003ES2LAN_SDS_SPT },
54 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82540EM },
55 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82540EM_LOM },
56 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82540EP },
57 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82540EP_LOM },
58 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82540EP_LP },
59 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82541EI },
60 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82541EI_MOBILE },
61 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82541ER },
62 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82541ER_LOM },
63 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82541GI },
64 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82541GI_LF },
65 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82541GI_MOBILE },
66 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82542 },
67 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82543GC_COPPER },
68 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82543GC_FIBER },
69 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82544EI_COPPER },
70 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82544EI_FIBER },
71 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82544GC_COPPER },
72 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82544GC_LOM },
73 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82545EM_COPPER },
74 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82545EM_FIBER },
75 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82545GM_COPPER },
76 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82545GM_FIBER },
77 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82545GM_SERDES },
78 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82546EB_COPPER },
79 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82546EB_FIBER },
80 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82546EB_QUAD_CPR },
81 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82546GB_COPPER },
82 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82546GB_FIBER },
83 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82546GB_PCIE },
84 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82546GB_QUAD_CPR },
85 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82546GB_QUAD_CPR_K },
86 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82546GB_SERDES },
87 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82546GB_2 },
88 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82547EI },
89 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82547EI_MOBILE },
90 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82547GI },
91 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82571EB_AF },
92 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82571EB_AT },
93 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82571EB_COPPER },
94 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82571EB_FIBER },
95 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82571EB_QUAD_CPR },
96 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82571EB_QUAD_CPR_LP },
97 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82571EB_QUAD_FBR },
98 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82571EB_SERDES },
99 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82571EB_SDS_DUAL },
100 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82571EB_SDS_QUAD },
101 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82571PT_QUAD_CPR },
102 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82572EI_COPPER },
103 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82572EI_FIBER },
104 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82572EI_SERDES },
105 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82572EI },
106 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82573E },
107 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82573E_IAMT },
108 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82573E_PM },
109 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82573L },
110 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82573L_PL_1 },
111 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82573L_PL_2 },
112 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82573V_PM },
113 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82574L },
114 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82574LA },
115 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82575EB_COPPER },
116 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82575EB_SERDES },
117 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82575GB_QUAD_CPR },
118 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82575GB_QP_PM },
119 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82576 },
120 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82576_FIBER },
121 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82576_SERDES },
122 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82576_QUAD_COPPER },
123 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82576_QUAD_CU_ET2 },
124 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82576_NS },
125 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82576_NS_SERDES },
126 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82576_SERDES_QUAD },
127 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82577LC },
128 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82577LM },
129 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82578DC },
130 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82578DM },
131 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82579LM },
132 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82579V },
133 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I210_COPPER },
134 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I210_COPPER_OEM1 },
135 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I210_COPPER_IT },
136 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I210_FIBER },
137 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I210_SERDES },
138 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I210_SGMII },
139 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I210_COPPER_NF },
140 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I210_SERDES_NF },
141 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I211_COPPER },
142 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I217_LM },
143 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I217_V },
144 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I218_LM },
145 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I218_LM_2 },
146 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I218_LM_3 },
147 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I218_V },
148 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I218_V_2 },
149 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I218_V_3 },
150 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM },
151 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM2 },
152 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM3 },
153 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM4 },
154 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM5 },
155 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM6 },
156 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM7 },
157 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM8 },
158 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM9 },
159 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM10 },
160 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM11 },
161 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM12 },
162 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM13 },
163 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM14 },
164 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM15 },
165 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM16 },
166 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM17 },
167 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM18 },
168 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM19 },
169 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V },
170 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V2 },
171 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V4 },
172 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V5 },
173 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V6 },
174 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V7 },
175 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V8 },
176 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V9 },
177 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V10 },
178 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V11 },
179 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V12 },
180 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V13 },
181 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V14 },
182 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V15 },
183 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V16 },
184 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V17 },
185 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V18 },
186 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V19 },
187 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82580_COPPER },
188 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82580_FIBER },
189 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82580_SERDES },
190 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82580_SGMII },
191 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82580_COPPER_DUAL },
192 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82580_QUAD_FIBER },
193 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_DH89XXCC_SGMII },
194 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_DH89XXCC_SERDES },
195 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_DH89XXCC_BPLANE },
196 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_DH89XXCC_SFP },
197 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82583V },
198 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I350_COPPER },
199 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I350_FIBER },
200 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I350_SERDES },
201 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I350_SGMII },
202 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I354_BP_1GBPS },
203 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I354_BP_2_5GBPS },
204 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I354_SGMII },
205 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH8_82567V_3 },
206 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH8_IFE },
207 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH8_IFE_G },
208 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH8_IFE_GT },
209 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH8_IGP_AMT },
210 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH8_IGP_C },
211 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH8_IGP_M },
212 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH8_IGP_M_AMT },
213 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH9_BM },
214 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH9_IFE },
215 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH9_IFE_G },
216 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH9_IFE_GT },
217 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH9_IGP_AMT },
218 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH9_IGP_C },
219 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH9_IGP_M },
220 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH9_IGP_M_AMT },
221 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH9_IGP_M_V },
222 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH10_D_BM_LF },
223 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH10_D_BM_LM },
224 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH10_D_BM_V },
225 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH10_R_BM_LF },
226 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH10_R_BM_LM },
227 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH10_R_BM_V },
228 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_EP80579_LAN_1 },
229 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_EP80579_LAN_2 },
230 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_EP80579_LAN_3 },
231 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_EP80579_LAN_4 },
232 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_EP80579_LAN_5 },
233 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_EP80579_LAN_6 }
234 };
235 
236 /*********************************************************************
237  *  Function prototypes
238  *********************************************************************/
239 int  em_probe(struct device *, void *, void *);
240 void em_attach(struct device *, struct device *, void *);
241 void em_defer_attach(struct device*);
242 int  em_detach(struct device *, int);
243 int  em_activate(struct device *, int);
244 int  em_intr(void *);
245 int  em_allocate_legacy(struct em_softc *);
246 void em_start(struct ifqueue *);
247 int  em_ioctl(struct ifnet *, u_long, caddr_t);
248 void em_watchdog(struct ifnet *);
249 void em_init(void *);
250 void em_stop(void *, int);
251 void em_media_status(struct ifnet *, struct ifmediareq *);
252 int  em_media_change(struct ifnet *);
253 uint64_t  em_flowstatus(struct em_softc *);
254 void em_identify_hardware(struct em_softc *);
255 int  em_allocate_pci_resources(struct em_softc *);
256 void em_free_pci_resources(struct em_softc *);
257 void em_local_timer(void *);
258 int  em_hardware_init(struct em_softc *);
259 void em_setup_interface(struct em_softc *);
260 int  em_setup_transmit_structures(struct em_softc *);
261 void em_initialize_transmit_unit(struct em_softc *);
262 int  em_setup_receive_structures(struct em_softc *);
263 void em_initialize_receive_unit(struct em_softc *);
264 void em_enable_intr(struct em_softc *);
265 void em_disable_intr(struct em_softc *);
266 void em_free_transmit_structures(struct em_softc *);
267 void em_free_receive_structures(struct em_softc *);
268 void em_update_stats_counters(struct em_softc *);
269 void em_disable_aspm(struct em_softc *);
270 void em_txeof(struct em_queue *);
271 int  em_allocate_receive_structures(struct em_softc *);
272 int  em_allocate_transmit_structures(struct em_softc *);
273 int  em_allocate_desc_rings(struct em_softc *);
274 int  em_rxfill(struct em_queue *);
275 void em_rxrefill(void *);
276 int  em_rxeof(struct em_queue *);
277 void em_receive_checksum(struct em_softc *, struct em_rx_desc *,
278 			 struct mbuf *);
279 u_int	em_transmit_checksum_setup(struct em_queue *, struct mbuf *, u_int,
280 	    u_int32_t *, u_int32_t *);
281 void em_iff(struct em_softc *);
282 void em_update_link_status(struct em_softc *);
283 int  em_get_buf(struct em_queue *, int);
284 void em_enable_hw_vlans(struct em_softc *);
285 u_int em_encap(struct em_queue *, struct mbuf *);
286 void em_smartspeed(struct em_softc *);
287 int  em_82547_fifo_workaround(struct em_softc *, int);
288 void em_82547_update_fifo_head(struct em_softc *, int);
289 int  em_82547_tx_fifo_reset(struct em_softc *);
290 void em_82547_move_tail(void *arg);
291 void em_82547_move_tail_locked(struct em_softc *);
292 int  em_dma_malloc(struct em_softc *, bus_size_t, struct em_dma_alloc *);
293 void em_dma_free(struct em_softc *, struct em_dma_alloc *);
294 u_int32_t em_fill_descriptors(u_int64_t address, u_int32_t length,
295 			      PDESC_ARRAY desc_array);
296 void em_flush_tx_ring(struct em_queue *);
297 void em_flush_rx_ring(struct em_queue *);
298 void em_flush_desc_rings(struct em_softc *);
299 int em_get_sffpage(struct em_softc *, struct if_sffpage *);
300 
301 #ifndef SMALL_KERNEL
302 /* MSIX/Multiqueue functions */
303 int  em_allocate_msix(struct em_softc *);
304 int  em_setup_queues_msix(struct em_softc *);
305 int  em_queue_intr_msix(void *);
306 int  em_link_intr_msix(void *);
307 void em_enable_queue_intr_msix(struct em_queue *);
308 #else
309 #define em_allocate_msix(_sc) 	(-1)
310 #endif
311 
312 #if NKSTAT > 0
313 void	em_kstat_attach(struct em_softc *);
314 int	em_kstat_read(struct kstat *);
315 void	em_tbi_adjust_stats(struct em_softc *, uint32_t, uint8_t *);
316 #endif
317 
318 /*********************************************************************
319  *  OpenBSD Device Interface Entry Points
320  *********************************************************************/
321 
322 const struct cfattach em_ca = {
323 	sizeof(struct em_softc), em_probe, em_attach, em_detach,
324 	em_activate
325 };
326 
327 struct cfdriver em_cd = {
328 	NULL, "em", DV_IFNET
329 };
330 
331 static int em_smart_pwr_down = FALSE;
332 int em_enable_msix = 0;
333 
334 /*********************************************************************
335  *  Device identification routine
336  *
337  *  em_probe determines if the driver should be loaded on
338  *  adapter based on PCI vendor/device id of the adapter.
339  *
340  *  return 0 on no match, positive on match
341  *********************************************************************/
342 
343 int
344 em_probe(struct device *parent, void *match, void *aux)
345 {
346 	INIT_DEBUGOUT("em_probe: begin");
347 
348 	return (pci_matchbyid((struct pci_attach_args *)aux, em_devices,
349 	    nitems(em_devices)));
350 }
351 
352 void
353 em_defer_attach(struct device *self)
354 {
355 	struct em_softc *sc = (struct em_softc *)self;
356 	struct pci_attach_args *pa = &sc->osdep.em_pa;
357 	pci_chipset_tag_t	pc = pa->pa_pc;
358 	void *gcu;
359 
360 	INIT_DEBUGOUT("em_defer_attach: begin");
361 
362 	if ((gcu = em_lookup_gcu(self)) == 0) {
363 		printf("%s: No GCU found, deferred attachment failed\n",
364 		    DEVNAME(sc));
365 
366 		if (sc->sc_intrhand)
367 			pci_intr_disestablish(pc, sc->sc_intrhand);
368 		sc->sc_intrhand = 0;
369 
370 		em_stop(sc, 1);
371 
372 		em_free_pci_resources(sc);
373 
374 		return;
375 	}
376 
377 	sc->hw.gcu = gcu;
378 
379 	em_attach_miibus(self);
380 
381 	em_setup_interface(sc);
382 
383 	em_setup_link(&sc->hw);
384 
385 	em_update_link_status(sc);
386 }
387 
388 /*********************************************************************
389  *  Device initialization routine
390  *
391  *  The attach entry point is called when the driver is being loaded.
392  *  This routine identifies the type of hardware, allocates all resources
393  *  and initializes the hardware.
394  *
395  *********************************************************************/
396 
397 void
398 em_attach(struct device *parent, struct device *self, void *aux)
399 {
400 	struct pci_attach_args *pa = aux;
401 	struct em_softc *sc;
402 	int defer = 0;
403 
404 	INIT_DEBUGOUT("em_attach: begin");
405 
406 	sc = (struct em_softc *)self;
407 	sc->sc_dmat = pa->pa_dmat;
408 	sc->osdep.em_pa = *pa;
409 
410 	timeout_set(&sc->timer_handle, em_local_timer, sc);
411 	timeout_set(&sc->tx_fifo_timer_handle, em_82547_move_tail, sc);
412 
413 	rw_init(&sc->sfflock, "emsff");
414 
415 	/* Determine hardware revision */
416 	em_identify_hardware(sc);
417 
418 	/*
419 	 * Only use MSI on the newer PCIe parts, with the exception
420 	 * of 82571/82572 due to "Byte Enables 2 and 3 Are Not Set" errata
421 	 */
422 	if (sc->hw.mac_type <= em_82572)
423 		sc->osdep.em_pa.pa_flags &= ~PCI_FLAGS_MSI_ENABLED;
424 
425 	/* Parameters (to be read from user) */
426 	if (sc->hw.mac_type >= em_82544) {
427 		sc->sc_tx_slots = EM_MAX_TXD;
428 		sc->sc_rx_slots = EM_MAX_RXD;
429 	} else {
430 		sc->sc_tx_slots = EM_MAX_TXD_82543;
431 		sc->sc_rx_slots = EM_MAX_RXD_82543;
432 	}
433 	sc->tx_int_delay = EM_TIDV;
434 	sc->tx_abs_int_delay = EM_TADV;
435 	sc->rx_int_delay = EM_RDTR;
436 	sc->rx_abs_int_delay = EM_RADV;
437 	sc->hw.autoneg = DO_AUTO_NEG;
438 	sc->hw.wait_autoneg_complete = WAIT_FOR_AUTO_NEG_DEFAULT;
439 	sc->hw.autoneg_advertised = AUTONEG_ADV_DEFAULT;
440 	sc->hw.tbi_compatibility_en = TRUE;
441 	sc->sc_rx_buffer_len = EM_RXBUFFER_2048;
442 
443 	sc->hw.phy_init_script = 1;
444 	sc->hw.phy_reset_disable = FALSE;
445 
446 #ifndef EM_MASTER_SLAVE
447 	sc->hw.master_slave = em_ms_hw_default;
448 #else
449 	sc->hw.master_slave = EM_MASTER_SLAVE;
450 #endif
451 
452 	/*
453 	 * This controls when hardware reports transmit completion
454 	 * status.
455 	 */
456 	sc->hw.report_tx_early = 1;
457 
458 	if (em_allocate_pci_resources(sc))
459 		goto err_pci;
460 
461 	/* Initialize eeprom parameters */
462 	em_init_eeprom_params(&sc->hw);
463 
464 	/*
465 	 * Set the max frame size assuming standard Ethernet
466 	 * sized frames.
467 	 */
468 	switch (sc->hw.mac_type) {
469 		case em_82573:
470 		{
471 			uint16_t	eeprom_data = 0;
472 
473 			/*
474 			 * 82573 only supports Jumbo frames
475 			 * if ASPM is disabled.
476 			 */
477 			em_read_eeprom(&sc->hw, EEPROM_INIT_3GIO_3,
478 			    1, &eeprom_data);
479 			if (eeprom_data & EEPROM_WORD1A_ASPM_MASK) {
480 				sc->hw.max_frame_size = ETHER_MAX_LEN;
481 				break;
482 			}
483 			/* Allow Jumbo frames */
484 			/* FALLTHROUGH */
485 		}
486 		case em_82571:
487 		case em_82572:
488 		case em_82574:
489 		case em_82575:
490 		case em_82576:
491 		case em_82580:
492 		case em_i210:
493 		case em_i350:
494 		case em_ich9lan:
495 		case em_ich10lan:
496 		case em_pch2lan:
497 		case em_pch_lpt:
498 		case em_pch_spt:
499 		case em_pch_cnp:
500 		case em_pch_tgp:
501 		case em_pch_adp:
502 		case em_80003es2lan:
503 			/* 9K Jumbo Frame size */
504 			sc->hw.max_frame_size = 9234;
505 			break;
506 		case em_pchlan:
507 			sc->hw.max_frame_size = 4096;
508 			break;
509 		case em_82542_rev2_0:
510 		case em_82542_rev2_1:
511 		case em_ich8lan:
512 			/* Adapters that do not support Jumbo frames */
513 			sc->hw.max_frame_size = ETHER_MAX_LEN;
514 			break;
515 		default:
516 			sc->hw.max_frame_size =
517 			    MAX_JUMBO_FRAME_SIZE;
518 	}
519 
520 	sc->hw.min_frame_size =
521 	    ETHER_MIN_LEN + ETHER_CRC_LEN;
522 
523 	if (em_allocate_desc_rings(sc) != 0) {
524 		printf("%s: Unable to allocate descriptor ring memory\n",
525 		    DEVNAME(sc));
526 		goto err_pci;
527 	}
528 
529 	/* Initialize the hardware */
530 	if ((defer = em_hardware_init(sc))) {
531 		if (defer == EAGAIN)
532 			config_defer(self, em_defer_attach);
533 		else {
534 			printf("%s: Unable to initialize the hardware\n",
535 			    DEVNAME(sc));
536 			goto err_pci;
537 		}
538 	}
539 
540 	if (sc->hw.mac_type == em_80003es2lan || sc->hw.mac_type == em_82575 ||
541 	    sc->hw.mac_type == em_82576 ||
542 	    sc->hw.mac_type == em_82580 || sc->hw.mac_type == em_i210 ||
543 	    sc->hw.mac_type == em_i350) {
544 		uint32_t reg = EM_READ_REG(&sc->hw, E1000_STATUS);
545 		sc->hw.bus_func = (reg & E1000_STATUS_FUNC_MASK) >>
546 		    E1000_STATUS_FUNC_SHIFT;
547 
548 		switch (sc->hw.bus_func) {
549 		case 0:
550 			sc->hw.swfw = E1000_SWFW_PHY0_SM;
551 			break;
552 		case 1:
553 			sc->hw.swfw = E1000_SWFW_PHY1_SM;
554 			break;
555 		case 2:
556 			sc->hw.swfw = E1000_SWFW_PHY2_SM;
557 			break;
558 		case 3:
559 			sc->hw.swfw = E1000_SWFW_PHY3_SM;
560 			break;
561 		}
562 	} else {
563 		sc->hw.bus_func = 0;
564 	}
565 
566 	/* Copy the permanent MAC address out of the EEPROM */
567 	if (em_read_mac_addr(&sc->hw) < 0) {
568 		printf("%s: EEPROM read error while reading mac address\n",
569 		       DEVNAME(sc));
570 		goto err_pci;
571 	}
572 
573 	bcopy(sc->hw.mac_addr, sc->sc_ac.ac_enaddr, ETHER_ADDR_LEN);
574 
575 	/* Setup OS specific network interface */
576 	if (!defer)
577 		em_setup_interface(sc);
578 
579 	/* Initialize statistics */
580 	em_clear_hw_cntrs(&sc->hw);
581 #if NKSTAT > 0
582 	em_kstat_attach(sc);
583 #endif
584 	sc->hw.get_link_status = 1;
585 	if (!defer)
586 		em_update_link_status(sc);
587 
588 #ifdef EM_DEBUG
589 	printf(", mac %#x phy %#x", sc->hw.mac_type, sc->hw.phy_type);
590 #endif
591 	printf(", address %s\n", ether_sprintf(sc->sc_ac.ac_enaddr));
592 
593 	/* Indicate SOL/IDER usage */
594 	if (em_check_phy_reset_block(&sc->hw))
595 		printf("%s: PHY reset is blocked due to SOL/IDER session.\n",
596 		    DEVNAME(sc));
597 
598 	/* Identify 82544 on PCI-X */
599 	em_get_bus_info(&sc->hw);
600 	if (sc->hw.bus_type == em_bus_type_pcix &&
601 	    sc->hw.mac_type == em_82544)
602 		sc->pcix_82544 = TRUE;
603         else
604 		sc->pcix_82544 = FALSE;
605 
606 	sc->hw.icp_xxxx_is_link_up = FALSE;
607 
608 	INIT_DEBUGOUT("em_attach: end");
609 	return;
610 
611 err_pci:
612 	em_free_pci_resources(sc);
613 }
614 
615 /*********************************************************************
616  *  Transmit entry point
617  *
618  *  em_start is called by the stack to initiate a transmit.
619  *  The driver will remain in this routine as long as there are
620  *  packets to transmit and transmit resources are available.
621  *  In case resources are not available stack is notified and
622  *  the packet is requeued.
623  **********************************************************************/
624 
625 void
626 em_start(struct ifqueue *ifq)
627 {
628 	struct ifnet *ifp = ifq->ifq_if;
629 	struct em_softc *sc = ifp->if_softc;
630 	u_int head, free, used;
631 	struct mbuf *m;
632 	int post = 0;
633 	struct em_queue *que = sc->queues; /* Use only first queue. */
634 
635 	if (!sc->link_active) {
636 		ifq_purge(ifq);
637 		return;
638 	}
639 
640 	/* calculate free space */
641 	head = que->tx.sc_tx_desc_head;
642 	free = que->tx.sc_tx_desc_tail;
643 	if (free <= head)
644 		free += sc->sc_tx_slots;
645 	free -= head;
646 
647 	if (sc->hw.mac_type != em_82547) {
648 		bus_dmamap_sync(sc->sc_dmat, que->tx.sc_tx_dma.dma_map,
649 		    0, que->tx.sc_tx_dma.dma_map->dm_mapsize,
650 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
651 	}
652 
653 	for (;;) {
654 		/* use 2 because cksum setup can use an extra slot */
655 		if (EM_MAX_SCATTER + 2 > free) {
656 			ifq_set_oactive(ifq);
657 			break;
658 		}
659 
660 		m = ifq_dequeue(ifq);
661 		if (m == NULL)
662 			break;
663 
664 		used = em_encap(que, m);
665 		if (used == 0) {
666 			m_freem(m);
667 			continue;
668 		}
669 
670 		KASSERT(used <= free);
671 
672 		free -= used;
673 
674 #if NBPFILTER > 0
675 		/* Send a copy of the frame to the BPF listener */
676 		if (ifp->if_bpf)
677 			bpf_mtap_ether(ifp->if_bpf, m, BPF_DIRECTION_OUT);
678 #endif
679 
680 		/* Set timeout in case hardware has problems transmitting */
681 		ifp->if_timer = EM_TX_TIMEOUT;
682 
683 		if (sc->hw.mac_type == em_82547) {
684 			int len = m->m_pkthdr.len;
685 
686 			if (sc->link_duplex == HALF_DUPLEX)
687 				em_82547_move_tail_locked(sc);
688 			else {
689 				E1000_WRITE_REG(&sc->hw, TDT(que->me),
690 				    que->tx.sc_tx_desc_head);
691 				em_82547_update_fifo_head(sc, len);
692 			}
693 		}
694 
695 		post = 1;
696 	}
697 
698 	if (sc->hw.mac_type != em_82547) {
699 		bus_dmamap_sync(sc->sc_dmat, que->tx.sc_tx_dma.dma_map,
700 		    0, que->tx.sc_tx_dma.dma_map->dm_mapsize,
701 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
702 		/*
703 		 * Advance the Transmit Descriptor Tail (Tdt),
704 		 * this tells the E1000 that this frame is
705 		 * available to transmit.
706 		 */
707 		if (post)
708 			E1000_WRITE_REG(&sc->hw, TDT(que->me),
709 			    que->tx.sc_tx_desc_head);
710 	}
711 }
712 
713 /*********************************************************************
714  *  Ioctl entry point
715  *
716  *  em_ioctl is called when the user wants to configure the
717  *  interface.
718  *
719  *  return 0 on success, positive on failure
720  **********************************************************************/
721 
722 int
723 em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
724 {
725 	int		error = 0;
726 	struct ifreq   *ifr = (struct ifreq *) data;
727 	struct em_softc *sc = ifp->if_softc;
728 	int s;
729 
730 	s = splnet();
731 
732 	switch (command) {
733 	case SIOCSIFADDR:
734 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFADDR (Set Interface "
735 			       "Addr)");
736 		if (!(ifp->if_flags & IFF_UP)) {
737 			ifp->if_flags |= IFF_UP;
738 			em_init(sc);
739 		}
740 		break;
741 
742 	case SIOCSIFFLAGS:
743 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFFLAGS (Set Interface Flags)");
744 		if (ifp->if_flags & IFF_UP) {
745 			if (ifp->if_flags & IFF_RUNNING)
746 				error = ENETRESET;
747 			else
748 				em_init(sc);
749 		} else {
750 			if (ifp->if_flags & IFF_RUNNING)
751 				em_stop(sc, 0);
752 		}
753 		break;
754 
755 	case SIOCSIFMEDIA:
756 		/* Check SOL/IDER usage */
757 		if (em_check_phy_reset_block(&sc->hw)) {
758 			printf("%s: Media change is blocked due to SOL/IDER session.\n",
759 			    DEVNAME(sc));
760 			break;
761 		}
762 	case SIOCGIFMEDIA:
763 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCxIFMEDIA (Get/Set Interface Media)");
764 		error = ifmedia_ioctl(ifp, ifr, &sc->media, command);
765 		break;
766 
767 	case SIOCGIFRXR:
768 		error = if_rxr_ioctl((struct if_rxrinfo *)ifr->ifr_data,
769 		    NULL, EM_MCLBYTES, &sc->queues->rx.sc_rx_ring);
770 		break;
771 
772 	case SIOCGIFSFFPAGE:
773 		error = rw_enter(&sc->sfflock, RW_WRITE|RW_INTR);
774 		if (error != 0)
775 			break;
776 
777 		error = em_get_sffpage(sc, (struct if_sffpage *)data);
778 		rw_exit(&sc->sfflock);
779 		break;
780 
781 	default:
782 		error = ether_ioctl(ifp, &sc->sc_ac, command, data);
783 	}
784 
785 	if (error == ENETRESET) {
786 		if (ifp->if_flags & IFF_RUNNING) {
787 			em_disable_intr(sc);
788 			em_iff(sc);
789 			if (sc->hw.mac_type == em_82542_rev2_0)
790 				em_initialize_receive_unit(sc);
791 			em_enable_intr(sc);
792 		}
793 		error = 0;
794 	}
795 
796 	splx(s);
797 	return (error);
798 }
799 
800 /*********************************************************************
801  *  Watchdog entry point
802  *
803  *  This routine is called whenever hardware quits transmitting.
804  *
805  **********************************************************************/
806 
807 void
808 em_watchdog(struct ifnet *ifp)
809 {
810 	struct em_softc *sc = ifp->if_softc;
811 	struct em_queue *que = sc->queues; /* Use only first queue. */
812 
813 
814 	/* If we are in this routine because of pause frames, then
815 	 * don't reset the hardware.
816 	 */
817 	if (E1000_READ_REG(&sc->hw, STATUS) & E1000_STATUS_TXOFF) {
818 		ifp->if_timer = EM_TX_TIMEOUT;
819 		return;
820 	}
821 	printf("%s: watchdog: head %u tail %u TDH %u TDT %u\n",
822 	    DEVNAME(sc),
823 	    que->tx.sc_tx_desc_head, que->tx.sc_tx_desc_tail,
824 	    E1000_READ_REG(&sc->hw, TDH(que->me)),
825 	    E1000_READ_REG(&sc->hw, TDT(que->me)));
826 
827 	em_init(sc);
828 
829 	sc->watchdog_events++;
830 }
831 
832 /*********************************************************************
833  *  Init entry point
834  *
835  *  This routine is used in two ways. It is used by the stack as
836  *  init entry point in network interface structure. It is also used
837  *  by the driver as a hw/sw initialization routine to get to a
838  *  consistent state.
839  *
840  **********************************************************************/
841 
842 void
843 em_init(void *arg)
844 {
845 	struct em_softc *sc = arg;
846 	struct ifnet   *ifp = &sc->sc_ac.ac_if;
847 	uint32_t	pba;
848 	int s;
849 
850 	s = splnet();
851 
852 	INIT_DEBUGOUT("em_init: begin");
853 
854 	em_stop(sc, 0);
855 
856 	/*
857 	 * Packet Buffer Allocation (PBA)
858 	 * Writing PBA sets the receive portion of the buffer
859 	 * the remainder is used for the transmit buffer.
860 	 *
861 	 * Devices before the 82547 had a Packet Buffer of 64K.
862 	 *   Default allocation: PBA=48K for Rx, leaving 16K for Tx.
863 	 * After the 82547 the buffer was reduced to 40K.
864 	 *   Default allocation: PBA=30K for Rx, leaving 10K for Tx.
865 	 *   Note: default does not leave enough room for Jumbo Frame >10k.
866 	 */
867 	switch (sc->hw.mac_type) {
868 	case em_82547:
869 	case em_82547_rev_2: /* 82547: Total Packet Buffer is 40K */
870 		if (sc->hw.max_frame_size > EM_RXBUFFER_8192)
871 			pba = E1000_PBA_22K; /* 22K for Rx, 18K for Tx */
872 		else
873 			pba = E1000_PBA_30K; /* 30K for Rx, 10K for Tx */
874 		sc->tx_fifo_head = 0;
875 		sc->tx_head_addr = pba << EM_TX_HEAD_ADDR_SHIFT;
876 		sc->tx_fifo_size = (E1000_PBA_40K - pba) << EM_PBA_BYTES_SHIFT;
877 		break;
878 	case em_82571:
879 	case em_82572: /* Total Packet Buffer on these is 48k */
880 	case em_82575:
881 	case em_82576:
882 	case em_82580:
883 	case em_80003es2lan:
884 	case em_i350:
885 		pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
886 		break;
887 	case em_i210:
888 		pba = E1000_PBA_34K;
889 		break;
890 	case em_82573: /* 82573: Total Packet Buffer is 32K */
891 		/* Jumbo frames not supported */
892 		pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
893 		break;
894 	case em_82574: /* Total Packet Buffer is 40k */
895 		pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
896 		break;
897 	case em_ich8lan:
898 		pba = E1000_PBA_8K;
899 		break;
900 	case em_ich9lan:
901 	case em_ich10lan:
902 		/* Boost Receive side for jumbo frames */
903 		if (sc->hw.max_frame_size > EM_RXBUFFER_4096)
904 			pba = E1000_PBA_14K;
905 		else
906 			pba = E1000_PBA_10K;
907 		break;
908 	case em_pchlan:
909 	case em_pch2lan:
910 	case em_pch_lpt:
911 	case em_pch_spt:
912 	case em_pch_cnp:
913 	case em_pch_tgp:
914 	case em_pch_adp:
915 		pba = E1000_PBA_26K;
916 		break;
917 	default:
918 		/* Devices before 82547 had a Packet Buffer of 64K.   */
919 		if (sc->hw.max_frame_size > EM_RXBUFFER_8192)
920 			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
921 		else
922 			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
923 	}
924 	INIT_DEBUGOUT1("em_init: pba=%dK",pba);
925 	E1000_WRITE_REG(&sc->hw, PBA, pba);
926 
927 	/* Get the latest mac address, User can use a LAA */
928 	bcopy(sc->sc_ac.ac_enaddr, sc->hw.mac_addr, ETHER_ADDR_LEN);
929 
930 	/* Initialize the hardware */
931 	if (em_hardware_init(sc)) {
932 		printf("%s: Unable to initialize the hardware\n",
933 		       DEVNAME(sc));
934 		splx(s);
935 		return;
936 	}
937 	em_update_link_status(sc);
938 
939 	E1000_WRITE_REG(&sc->hw, VET, ETHERTYPE_VLAN);
940 	if (ifp->if_capabilities & IFCAP_VLAN_HWTAGGING)
941 		em_enable_hw_vlans(sc);
942 
943 	/* Prepare transmit descriptors and buffers */
944 	if (em_setup_transmit_structures(sc)) {
945 		printf("%s: Could not setup transmit structures\n",
946 		       DEVNAME(sc));
947 		em_stop(sc, 0);
948 		splx(s);
949 		return;
950 	}
951 	em_initialize_transmit_unit(sc);
952 
953 	/* Prepare receive descriptors and buffers */
954 	if (em_setup_receive_structures(sc)) {
955 		printf("%s: Could not setup receive structures\n",
956 		       DEVNAME(sc));
957 		em_stop(sc, 0);
958 		splx(s);
959 		return;
960 	}
961 	em_initialize_receive_unit(sc);
962 
963 #ifndef SMALL_KERNEL
964 	if (sc->msix) {
965 		if (em_setup_queues_msix(sc)) {
966 			printf("%s: Can't setup msix queues\n", DEVNAME(sc));
967 			splx(s);
968 			return;
969 		}
970 	}
971 #endif
972 
973 	/* Program promiscuous mode and multicast filters. */
974 	em_iff(sc);
975 
976 	ifp->if_flags |= IFF_RUNNING;
977 	ifq_clr_oactive(&ifp->if_snd);
978 
979 	timeout_add_sec(&sc->timer_handle, 1);
980 	em_clear_hw_cntrs(&sc->hw);
981 	em_enable_intr(sc);
982 
983 	/* Don't reset the phy next time init gets called */
984 	sc->hw.phy_reset_disable = TRUE;
985 
986 	splx(s);
987 }
988 
989 /*********************************************************************
990  *
991  *  Interrupt Service routine
992  *
993  **********************************************************************/
994 int
995 em_intr(void *arg)
996 {
997 	struct em_softc	*sc = arg;
998 	struct em_queue *que = sc->queues; /* single queue */
999 	struct ifnet	*ifp = &sc->sc_ac.ac_if;
1000 	u_int32_t	reg_icr, test_icr;
1001 
1002 	test_icr = reg_icr = E1000_READ_REG(&sc->hw, ICR);
1003 	if (sc->hw.mac_type >= em_82571)
1004 		test_icr = (reg_icr & E1000_ICR_INT_ASSERTED);
1005 	if (!test_icr)
1006 		return (0);
1007 
1008 	if (ifp->if_flags & IFF_RUNNING) {
1009 		em_txeof(que);
1010 		if (em_rxeof(que))
1011 			em_rxrefill(que);
1012 	}
1013 
1014 	/* Link status change */
1015 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1016 		KERNEL_LOCK();
1017 		sc->hw.get_link_status = 1;
1018 		em_check_for_link(&sc->hw);
1019 		em_update_link_status(sc);
1020 		KERNEL_UNLOCK();
1021 	}
1022 
1023 	return (1);
1024 }
1025 
1026 /*********************************************************************
1027  *
1028  *  Media Ioctl callback
1029  *
1030  *  This routine is called whenever the user queries the status of
1031  *  the interface using ifconfig.
1032  *
1033  **********************************************************************/
1034 void
1035 em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1036 {
1037 	struct em_softc *sc = ifp->if_softc;
1038 	uint64_t fiber_type = IFM_1000_SX;
1039 	u_int16_t gsr;
1040 
1041 	INIT_DEBUGOUT("em_media_status: begin");
1042 
1043 	em_check_for_link(&sc->hw);
1044 	em_update_link_status(sc);
1045 
1046 	ifmr->ifm_status = IFM_AVALID;
1047 	ifmr->ifm_active = IFM_ETHER;
1048 
1049 	if (!sc->link_active) {
1050 		ifmr->ifm_active |= IFM_NONE;
1051 		return;
1052 	}
1053 
1054 	ifmr->ifm_status |= IFM_ACTIVE;
1055 
1056 	if (sc->hw.media_type == em_media_type_fiber ||
1057 	    sc->hw.media_type == em_media_type_internal_serdes) {
1058 		if (sc->hw.mac_type == em_82545)
1059 			fiber_type = IFM_1000_LX;
1060 		ifmr->ifm_active |= fiber_type | IFM_FDX;
1061 	} else {
1062 		switch (sc->link_speed) {
1063 		case 10:
1064 			ifmr->ifm_active |= IFM_10_T;
1065 			break;
1066 		case 100:
1067 			ifmr->ifm_active |= IFM_100_TX;
1068 			break;
1069 		case 1000:
1070 			ifmr->ifm_active |= IFM_1000_T;
1071 			break;
1072 		}
1073 
1074 		if (sc->link_duplex == FULL_DUPLEX)
1075 			ifmr->ifm_active |= em_flowstatus(sc) | IFM_FDX;
1076 		else
1077 			ifmr->ifm_active |= IFM_HDX;
1078 
1079 		if (IFM_SUBTYPE(ifmr->ifm_active) == IFM_1000_T) {
1080 			em_read_phy_reg(&sc->hw, PHY_1000T_STATUS, &gsr);
1081 			if (gsr & SR_1000T_MS_CONFIG_RES)
1082 				ifmr->ifm_active |= IFM_ETH_MASTER;
1083 		}
1084 	}
1085 }
1086 
1087 /*********************************************************************
1088  *
1089  *  Media Ioctl callback
1090  *
1091  *  This routine is called when the user changes speed/duplex using
1092  *  media/mediopt option with ifconfig.
1093  *
1094  **********************************************************************/
1095 int
1096 em_media_change(struct ifnet *ifp)
1097 {
1098 	struct em_softc *sc = ifp->if_softc;
1099 	struct ifmedia	*ifm = &sc->media;
1100 
1101 	INIT_DEBUGOUT("em_media_change: begin");
1102 
1103 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1104 		return (EINVAL);
1105 
1106 	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1107 	case IFM_AUTO:
1108 		sc->hw.autoneg = DO_AUTO_NEG;
1109 		sc->hw.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1110 		break;
1111 	case IFM_1000_LX:
1112 	case IFM_1000_SX:
1113 	case IFM_1000_T:
1114 		sc->hw.autoneg = DO_AUTO_NEG;
1115 		sc->hw.autoneg_advertised = ADVERTISE_1000_FULL;
1116 		break;
1117 	case IFM_100_TX:
1118 		sc->hw.autoneg = FALSE;
1119 		sc->hw.autoneg_advertised = 0;
1120 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1121 			sc->hw.forced_speed_duplex = em_100_full;
1122 		else
1123 			sc->hw.forced_speed_duplex = em_100_half;
1124 		break;
1125 	case IFM_10_T:
1126 		sc->hw.autoneg = FALSE;
1127 		sc->hw.autoneg_advertised = 0;
1128 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1129 			sc->hw.forced_speed_duplex = em_10_full;
1130 		else
1131 			sc->hw.forced_speed_duplex = em_10_half;
1132 		break;
1133 	default:
1134 		printf("%s: Unsupported media type\n", DEVNAME(sc));
1135 	}
1136 
1137 	/*
1138 	 * As the speed/duplex settings may have changed we need to
1139 	 * reset the PHY.
1140 	 */
1141 	sc->hw.phy_reset_disable = FALSE;
1142 
1143 	em_init(sc);
1144 
1145 	return (0);
1146 }
1147 
1148 uint64_t
1149 em_flowstatus(struct em_softc *sc)
1150 {
1151 	u_int16_t ar, lpar;
1152 
1153 	if (sc->hw.media_type == em_media_type_fiber ||
1154 	    sc->hw.media_type == em_media_type_internal_serdes)
1155 		return (0);
1156 
1157 	em_read_phy_reg(&sc->hw, PHY_AUTONEG_ADV, &ar);
1158 	em_read_phy_reg(&sc->hw, PHY_LP_ABILITY, &lpar);
1159 
1160 	if ((ar & NWAY_AR_PAUSE) && (lpar & NWAY_LPAR_PAUSE))
1161 		return (IFM_FLOW|IFM_ETH_TXPAUSE|IFM_ETH_RXPAUSE);
1162 	else if (!(ar & NWAY_AR_PAUSE) && (ar & NWAY_AR_ASM_DIR) &&
1163 		(lpar & NWAY_LPAR_PAUSE) && (lpar & NWAY_LPAR_ASM_DIR))
1164 		return (IFM_FLOW|IFM_ETH_TXPAUSE);
1165 	else if ((ar & NWAY_AR_PAUSE) && (ar & NWAY_AR_ASM_DIR) &&
1166 		!(lpar & NWAY_LPAR_PAUSE) && (lpar & NWAY_LPAR_ASM_DIR))
1167 		return (IFM_FLOW|IFM_ETH_RXPAUSE);
1168 
1169 	return (0);
1170 }
1171 
1172 /*********************************************************************
1173  *
1174  *  This routine maps the mbufs to tx descriptors.
1175  *
1176  *  return 0 on success, positive on failure
1177  **********************************************************************/
1178 u_int
1179 em_encap(struct em_queue *que, struct mbuf *m)
1180 {
1181 	struct em_softc *sc = que->sc;
1182 	struct em_packet *pkt;
1183 	struct em_tx_desc *desc;
1184 	bus_dmamap_t map;
1185 	u_int32_t txd_upper, txd_lower;
1186 	u_int head, last, used = 0;
1187 	int i, j;
1188 
1189 	/* For 82544 Workaround */
1190 	DESC_ARRAY		desc_array;
1191 	u_int32_t		array_elements;
1192 
1193 	/* get a dmamap for this packet from the next free slot */
1194 	head = que->tx.sc_tx_desc_head;
1195 	pkt = &que->tx.sc_tx_pkts_ring[head];
1196 	map = pkt->pkt_map;
1197 
1198 	switch (bus_dmamap_load_mbuf(sc->sc_dmat, map, m, BUS_DMA_NOWAIT)) {
1199 	case 0:
1200 		break;
1201 	case EFBIG:
1202 		if (m_defrag(m, M_DONTWAIT) == 0 &&
1203 		    bus_dmamap_load_mbuf(sc->sc_dmat, map, m,
1204 		     BUS_DMA_NOWAIT) == 0)
1205 			break;
1206 
1207 		/* FALLTHROUGH */
1208 	default:
1209 		sc->no_tx_dma_setup++;
1210 		return (0);
1211 	}
1212 
1213 	bus_dmamap_sync(sc->sc_dmat, map,
1214 	    0, map->dm_mapsize,
1215 	    BUS_DMASYNC_PREWRITE);
1216 
1217 	if (sc->hw.mac_type == em_82547) {
1218 		bus_dmamap_sync(sc->sc_dmat, que->tx.sc_tx_dma.dma_map,
1219 		    0, que->tx.sc_tx_dma.dma_map->dm_mapsize,
1220 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1221 	}
1222 
1223 	if (sc->hw.mac_type >= em_82543 && sc->hw.mac_type != em_82575 &&
1224 	    sc->hw.mac_type != em_82576 &&
1225 	    sc->hw.mac_type != em_82580 && sc->hw.mac_type != em_i210 &&
1226 	    sc->hw.mac_type != em_i350) {
1227 		used += em_transmit_checksum_setup(que, m, head,
1228 		    &txd_upper, &txd_lower);
1229 	} else {
1230 		txd_upper = txd_lower = 0;
1231 	}
1232 
1233 	head += used;
1234 	if (head >= sc->sc_tx_slots)
1235 		head -= sc->sc_tx_slots;
1236 
1237 	for (i = 0; i < map->dm_nsegs; i++) {
1238 		/* If sc is 82544 and on PCI-X bus */
1239 		if (sc->pcix_82544) {
1240 			/*
1241 			 * Check the Address and Length combination and
1242 			 * split the data accordingly
1243 			 */
1244 			array_elements = em_fill_descriptors(
1245 			    map->dm_segs[i].ds_addr, map->dm_segs[i].ds_len,
1246 			    &desc_array);
1247 			for (j = 0; j < array_elements; j++) {
1248 				desc = &que->tx.sc_tx_desc_ring[head];
1249 
1250 				desc->buffer_addr = htole64(
1251 					desc_array.descriptor[j].address);
1252 				desc->lower.data = htole32(
1253 					(que->tx.sc_txd_cmd | txd_lower |
1254 					 (u_int16_t)desc_array.descriptor[j].length));
1255 				desc->upper.data = htole32(txd_upper);
1256 
1257 				last = head;
1258 				if (++head == sc->sc_tx_slots)
1259 					head = 0;
1260 
1261 				used++;
1262 			}
1263 		} else {
1264 			desc = &que->tx.sc_tx_desc_ring[head];
1265 
1266 			desc->buffer_addr = htole64(map->dm_segs[i].ds_addr);
1267 			desc->lower.data = htole32(que->tx.sc_txd_cmd |
1268 			    txd_lower | map->dm_segs[i].ds_len);
1269 			desc->upper.data = htole32(txd_upper);
1270 
1271 			last = head;
1272 			if (++head == sc->sc_tx_slots)
1273 	        		head = 0;
1274 
1275 			used++;
1276 		}
1277 	}
1278 
1279 #if NVLAN > 0
1280 	/* Find out if we are in VLAN mode */
1281 	if (m->m_flags & M_VLANTAG) {
1282 		/* Set the VLAN id */
1283 		desc->upper.fields.special = htole16(m->m_pkthdr.ether_vtag);
1284 
1285 		/* Tell hardware to add tag */
1286 		desc->lower.data |= htole32(E1000_TXD_CMD_VLE);
1287 	}
1288 #endif
1289 
1290 	/* mark the packet with the mbuf and last desc slot */
1291 	pkt->pkt_m = m;
1292 	pkt->pkt_eop = last;
1293 
1294 	que->tx.sc_tx_desc_head = head;
1295 
1296 	/*
1297 	 * Last Descriptor of Packet
1298 	 * needs End Of Packet (EOP)
1299 	 * and Report Status (RS)
1300 	 */
1301 	desc->lower.data |= htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
1302 
1303 	if (sc->hw.mac_type == em_82547) {
1304 		bus_dmamap_sync(sc->sc_dmat, que->tx.sc_tx_dma.dma_map,
1305 		    0, que->tx.sc_tx_dma.dma_map->dm_mapsize,
1306 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1307 	}
1308 
1309 	return (used);
1310 }
1311 
1312 /*********************************************************************
1313  *
1314  * 82547 workaround to avoid controller hang in half-duplex environment.
1315  * The workaround is to avoid queuing a large packet that would span
1316  * the internal Tx FIFO ring boundary. We need to reset the FIFO pointers
1317  * in this case. We do that only when FIFO is quiescent.
1318  *
1319  **********************************************************************/
1320 void
1321 em_82547_move_tail_locked(struct em_softc *sc)
1322 {
1323 	uint16_t hw_tdt;
1324 	uint16_t sw_tdt;
1325 	struct em_tx_desc *tx_desc;
1326 	uint16_t length = 0;
1327 	boolean_t eop = 0;
1328 	struct em_queue *que = sc->queues; /* single queue chip */
1329 
1330 	hw_tdt = E1000_READ_REG(&sc->hw, TDT(que->me));
1331 	sw_tdt = que->tx.sc_tx_desc_head;
1332 
1333 	while (hw_tdt != sw_tdt) {
1334 		tx_desc = &que->tx.sc_tx_desc_ring[hw_tdt];
1335 		length += tx_desc->lower.flags.length;
1336 		eop = tx_desc->lower.data & E1000_TXD_CMD_EOP;
1337 		if (++hw_tdt == sc->sc_tx_slots)
1338 			hw_tdt = 0;
1339 
1340 		if (eop) {
1341 			if (em_82547_fifo_workaround(sc, length)) {
1342 				sc->tx_fifo_wrk_cnt++;
1343 				timeout_add(&sc->tx_fifo_timer_handle, 1);
1344 				break;
1345 			}
1346 			E1000_WRITE_REG(&sc->hw, TDT(que->me), hw_tdt);
1347 			em_82547_update_fifo_head(sc, length);
1348 			length = 0;
1349 		}
1350 	}
1351 }
1352 
1353 void
1354 em_82547_move_tail(void *arg)
1355 {
1356 	struct em_softc *sc = arg;
1357 	int s;
1358 
1359 	s = splnet();
1360 	em_82547_move_tail_locked(sc);
1361 	splx(s);
1362 }
1363 
1364 int
1365 em_82547_fifo_workaround(struct em_softc *sc, int len)
1366 {
1367 	int fifo_space, fifo_pkt_len;
1368 
1369 	fifo_pkt_len = EM_ROUNDUP(len + EM_FIFO_HDR, EM_FIFO_HDR);
1370 
1371 	if (sc->link_duplex == HALF_DUPLEX) {
1372 		fifo_space = sc->tx_fifo_size - sc->tx_fifo_head;
1373 
1374 		if (fifo_pkt_len >= (EM_82547_PKT_THRESH + fifo_space)) {
1375 			if (em_82547_tx_fifo_reset(sc))
1376 				return (0);
1377 			else
1378 				return (1);
1379 		}
1380 	}
1381 
1382 	return (0);
1383 }
1384 
1385 void
1386 em_82547_update_fifo_head(struct em_softc *sc, int len)
1387 {
1388 	int fifo_pkt_len = EM_ROUNDUP(len + EM_FIFO_HDR, EM_FIFO_HDR);
1389 
1390 	/* tx_fifo_head is always 16 byte aligned */
1391 	sc->tx_fifo_head += fifo_pkt_len;
1392 	if (sc->tx_fifo_head >= sc->tx_fifo_size)
1393 		sc->tx_fifo_head -= sc->tx_fifo_size;
1394 }
1395 
1396 int
1397 em_82547_tx_fifo_reset(struct em_softc *sc)
1398 {
1399 	uint32_t tctl;
1400 	struct em_queue *que = sc->queues; /* single queue chip */
1401 
1402 	if ((E1000_READ_REG(&sc->hw, TDT(que->me)) ==
1403 	     E1000_READ_REG(&sc->hw, TDH(que->me))) &&
1404 	    (E1000_READ_REG(&sc->hw, TDFT) ==
1405 	     E1000_READ_REG(&sc->hw, TDFH)) &&
1406 	    (E1000_READ_REG(&sc->hw, TDFTS) ==
1407 	     E1000_READ_REG(&sc->hw, TDFHS)) &&
1408 	    (E1000_READ_REG(&sc->hw, TDFPC) == 0)) {
1409 
1410 		/* Disable TX unit */
1411 		tctl = E1000_READ_REG(&sc->hw, TCTL);
1412 		E1000_WRITE_REG(&sc->hw, TCTL, tctl & ~E1000_TCTL_EN);
1413 
1414 		/* Reset FIFO pointers */
1415 		E1000_WRITE_REG(&sc->hw, TDFT, sc->tx_head_addr);
1416 		E1000_WRITE_REG(&sc->hw, TDFH, sc->tx_head_addr);
1417 		E1000_WRITE_REG(&sc->hw, TDFTS, sc->tx_head_addr);
1418 		E1000_WRITE_REG(&sc->hw, TDFHS, sc->tx_head_addr);
1419 
1420 		/* Re-enable TX unit */
1421 		E1000_WRITE_REG(&sc->hw, TCTL, tctl);
1422 		E1000_WRITE_FLUSH(&sc->hw);
1423 
1424 		sc->tx_fifo_head = 0;
1425 		sc->tx_fifo_reset_cnt++;
1426 
1427 		return (TRUE);
1428 	} else
1429 		return (FALSE);
1430 }
1431 
1432 void
1433 em_iff(struct em_softc *sc)
1434 {
1435 	struct ifnet *ifp = &sc->sc_ac.ac_if;
1436 	struct arpcom *ac = &sc->sc_ac;
1437 	u_int32_t reg_rctl = 0;
1438 	u_int8_t  mta[MAX_NUM_MULTICAST_ADDRESSES * ETH_LENGTH_OF_ADDRESS];
1439 	struct ether_multi *enm;
1440 	struct ether_multistep step;
1441 	int i = 0;
1442 
1443 	IOCTL_DEBUGOUT("em_iff: begin");
1444 
1445 	if (sc->hw.mac_type == em_82542_rev2_0) {
1446 		reg_rctl = E1000_READ_REG(&sc->hw, RCTL);
1447 		if (sc->hw.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
1448 			em_pci_clear_mwi(&sc->hw);
1449 		reg_rctl |= E1000_RCTL_RST;
1450 		E1000_WRITE_REG(&sc->hw, RCTL, reg_rctl);
1451 		msec_delay(5);
1452 	}
1453 
1454 	reg_rctl = E1000_READ_REG(&sc->hw, RCTL);
1455 	reg_rctl &= ~(E1000_RCTL_MPE | E1000_RCTL_UPE);
1456 	ifp->if_flags &= ~IFF_ALLMULTI;
1457 
1458 	if (ifp->if_flags & IFF_PROMISC || ac->ac_multirangecnt > 0 ||
1459 	    ac->ac_multicnt > MAX_NUM_MULTICAST_ADDRESSES) {
1460 		ifp->if_flags |= IFF_ALLMULTI;
1461 		reg_rctl |= E1000_RCTL_MPE;
1462 		if (ifp->if_flags & IFF_PROMISC)
1463 			reg_rctl |= E1000_RCTL_UPE;
1464 	} else {
1465 		ETHER_FIRST_MULTI(step, ac, enm);
1466 		while (enm != NULL) {
1467 			bcopy(enm->enm_addrlo, mta + i, ETH_LENGTH_OF_ADDRESS);
1468 			i += ETH_LENGTH_OF_ADDRESS;
1469 
1470 			ETHER_NEXT_MULTI(step, enm);
1471 		}
1472 
1473 		em_mc_addr_list_update(&sc->hw, mta, ac->ac_multicnt, 0, 1);
1474 	}
1475 
1476 	E1000_WRITE_REG(&sc->hw, RCTL, reg_rctl);
1477 
1478 	if (sc->hw.mac_type == em_82542_rev2_0) {
1479 		reg_rctl = E1000_READ_REG(&sc->hw, RCTL);
1480 		reg_rctl &= ~E1000_RCTL_RST;
1481 		E1000_WRITE_REG(&sc->hw, RCTL, reg_rctl);
1482 		msec_delay(5);
1483 		if (sc->hw.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
1484 			em_pci_set_mwi(&sc->hw);
1485 	}
1486 }
1487 
1488 /*********************************************************************
1489  *  Timer routine
1490  *
1491  *  This routine checks for link status and updates statistics.
1492  *
1493  **********************************************************************/
1494 
1495 void
1496 em_local_timer(void *arg)
1497 {
1498 	struct em_softc *sc = arg;
1499 	int s;
1500 
1501 	timeout_add_sec(&sc->timer_handle, 1);
1502 
1503 	s = splnet();
1504 	em_smartspeed(sc);
1505 	splx(s);
1506 
1507 #if NKSTAT > 0
1508 	if (sc->kstat != NULL && mtx_enter_try(&sc->kstat_mtx)) {
1509 		em_kstat_read(sc->kstat);
1510 		mtx_leave(&sc->kstat_mtx);
1511 	}
1512 #endif
1513 }
1514 
1515 void
1516 em_update_link_status(struct em_softc *sc)
1517 {
1518 	struct ifnet *ifp = &sc->sc_ac.ac_if;
1519 	u_char link_state;
1520 
1521 	if (E1000_READ_REG(&sc->hw, STATUS) & E1000_STATUS_LU) {
1522 		if (sc->link_active == 0) {
1523 			em_get_speed_and_duplex(&sc->hw,
1524 						&sc->link_speed,
1525 						&sc->link_duplex);
1526 			/* Check if we may set SPEED_MODE bit on PCI-E */
1527 			if ((sc->link_speed == SPEED_1000) &&
1528 			    ((sc->hw.mac_type == em_82571) ||
1529 			    (sc->hw.mac_type == em_82572) ||
1530 			    (sc->hw.mac_type == em_82575) ||
1531 			    (sc->hw.mac_type == em_82576) ||
1532 			    (sc->hw.mac_type == em_82580))) {
1533 				int tarc0;
1534 
1535 				tarc0 = E1000_READ_REG(&sc->hw, TARC0);
1536 				tarc0 |= SPEED_MODE_BIT;
1537 				E1000_WRITE_REG(&sc->hw, TARC0, tarc0);
1538 			}
1539 			sc->link_active = 1;
1540 			sc->smartspeed = 0;
1541 			ifp->if_baudrate = IF_Mbps(sc->link_speed);
1542 		}
1543 		link_state = (sc->link_duplex == FULL_DUPLEX) ?
1544 		    LINK_STATE_FULL_DUPLEX : LINK_STATE_HALF_DUPLEX;
1545 	} else {
1546 		if (sc->link_active == 1) {
1547 			ifp->if_baudrate = sc->link_speed = 0;
1548 			sc->link_duplex = 0;
1549 			sc->link_active = 0;
1550 		}
1551 		link_state = LINK_STATE_DOWN;
1552 	}
1553 	if (ifp->if_link_state != link_state) {
1554 		ifp->if_link_state = link_state;
1555 		if_link_state_change(ifp);
1556 	}
1557 }
1558 
1559 /*********************************************************************
1560  *
1561  *  This routine disables all traffic on the adapter by issuing a
1562  *  global reset on the MAC and deallocates TX/RX buffers.
1563  *
1564  **********************************************************************/
1565 
1566 void
1567 em_stop(void *arg, int softonly)
1568 {
1569 	struct em_softc *sc = arg;
1570 	struct em_queue *que = sc->queues; /* Use only first queue. */
1571 	struct ifnet   *ifp = &sc->sc_ac.ac_if;
1572 
1573 	/* Tell the stack that the interface is no longer active */
1574 	ifp->if_flags &= ~IFF_RUNNING;
1575 
1576 	INIT_DEBUGOUT("em_stop: begin");
1577 
1578 	timeout_del(&que->rx_refill);
1579 	timeout_del(&sc->timer_handle);
1580 	timeout_del(&sc->tx_fifo_timer_handle);
1581 
1582 	if (!softonly)
1583 		em_disable_intr(sc);
1584 	if (sc->hw.mac_type >= em_pch_spt)
1585 		em_flush_desc_rings(sc);
1586 	if (!softonly)
1587 		em_reset_hw(&sc->hw);
1588 
1589 	intr_barrier(sc->sc_intrhand);
1590 	ifq_barrier(&ifp->if_snd);
1591 
1592 	KASSERT((ifp->if_flags & IFF_RUNNING) == 0);
1593 
1594 	ifq_clr_oactive(&ifp->if_snd);
1595 	ifp->if_timer = 0;
1596 
1597 	em_free_transmit_structures(sc);
1598 	em_free_receive_structures(sc);
1599 }
1600 
1601 /*********************************************************************
1602  *
1603  *  Determine hardware revision.
1604  *
1605  **********************************************************************/
1606 void
1607 em_identify_hardware(struct em_softc *sc)
1608 {
1609 	u_int32_t reg;
1610 	struct pci_attach_args *pa = &sc->osdep.em_pa;
1611 
1612 	/* Make sure our PCI config space has the necessary stuff set */
1613 	sc->hw.pci_cmd_word = pci_conf_read(pa->pa_pc, pa->pa_tag,
1614 					    PCI_COMMAND_STATUS_REG);
1615 
1616 	/* Save off the information about this board */
1617 	sc->hw.vendor_id = PCI_VENDOR(pa->pa_id);
1618 	sc->hw.device_id = PCI_PRODUCT(pa->pa_id);
1619 
1620 	reg = pci_conf_read(pa->pa_pc, pa->pa_tag, PCI_CLASS_REG);
1621 	sc->hw.revision_id = PCI_REVISION(reg);
1622 
1623 	reg = pci_conf_read(pa->pa_pc, pa->pa_tag, PCI_SUBSYS_ID_REG);
1624 	sc->hw.subsystem_vendor_id = PCI_VENDOR(reg);
1625 	sc->hw.subsystem_id = PCI_PRODUCT(reg);
1626 
1627 	/* Identify the MAC */
1628 	if (em_set_mac_type(&sc->hw))
1629 		printf("%s: Unknown MAC Type\n", DEVNAME(sc));
1630 
1631 	if (sc->hw.mac_type == em_pchlan)
1632 		sc->hw.revision_id = PCI_PRODUCT(pa->pa_id) & 0x0f;
1633 
1634 	if (sc->hw.mac_type == em_82541 ||
1635 	    sc->hw.mac_type == em_82541_rev_2 ||
1636 	    sc->hw.mac_type == em_82547 ||
1637 	    sc->hw.mac_type == em_82547_rev_2)
1638 		sc->hw.phy_init_script = TRUE;
1639 }
1640 
1641 void
1642 em_legacy_irq_quirk_spt(struct em_softc *sc)
1643 {
1644 	uint32_t	reg;
1645 
1646 	/* Legacy interrupt: SPT needs a quirk. */
1647 	if (sc->hw.mac_type != em_pch_spt && sc->hw.mac_type != em_pch_cnp &&
1648 	    sc->hw.mac_type != em_pch_tgp && sc->hw.mac_type != em_pch_adp)
1649 		return;
1650 	if (sc->legacy_irq == 0)
1651 		return;
1652 
1653 	reg = EM_READ_REG(&sc->hw, E1000_FEXTNVM7);
1654 	reg |= E1000_FEXTNVM7_SIDE_CLK_UNGATE;
1655 	EM_WRITE_REG(&sc->hw, E1000_FEXTNVM7, reg);
1656 
1657 	reg = EM_READ_REG(&sc->hw, E1000_FEXTNVM9);
1658 	reg |= E1000_FEXTNVM9_IOSFSB_CLKGATE_DIS |
1659 	    E1000_FEXTNVM9_IOSFSB_CLKREQ_DIS;
1660 	EM_WRITE_REG(&sc->hw, E1000_FEXTNVM9, reg);
1661 }
1662 
1663 int
1664 em_allocate_pci_resources(struct em_softc *sc)
1665 {
1666 	int		val, rid;
1667 	struct pci_attach_args *pa = &sc->osdep.em_pa;
1668 	struct em_queue	       *que = NULL;
1669 
1670 	val = pci_conf_read(pa->pa_pc, pa->pa_tag, EM_MMBA);
1671 	if (PCI_MAPREG_TYPE(val) != PCI_MAPREG_TYPE_MEM) {
1672 		printf(": mmba is not mem space\n");
1673 		return (ENXIO);
1674 	}
1675 	if (pci_mapreg_map(pa, EM_MMBA, PCI_MAPREG_MEM_TYPE(val), 0,
1676 	    &sc->osdep.mem_bus_space_tag, &sc->osdep.mem_bus_space_handle,
1677 	    &sc->osdep.em_membase, &sc->osdep.em_memsize, 0)) {
1678 		printf(": cannot find mem space\n");
1679 		return (ENXIO);
1680 	}
1681 
1682 	switch (sc->hw.mac_type) {
1683 	case em_82544:
1684 	case em_82540:
1685 	case em_82545:
1686 	case em_82546:
1687 	case em_82541:
1688 	case em_82541_rev_2:
1689 		/* Figure out where our I/O BAR is ? */
1690 		for (rid = PCI_MAPREG_START; rid < PCI_MAPREG_END;) {
1691 			val = pci_conf_read(pa->pa_pc, pa->pa_tag, rid);
1692 			if (PCI_MAPREG_TYPE(val) == PCI_MAPREG_TYPE_IO) {
1693 				sc->io_rid = rid;
1694 				break;
1695 			}
1696 			rid += 4;
1697 			if (PCI_MAPREG_MEM_TYPE(val) ==
1698 			    PCI_MAPREG_MEM_TYPE_64BIT)
1699 				rid += 4;	/* skip high bits, too */
1700 		}
1701 
1702 		if (pci_mapreg_map(pa, rid, PCI_MAPREG_TYPE_IO, 0,
1703 		    &sc->osdep.io_bus_space_tag, &sc->osdep.io_bus_space_handle,
1704 		    &sc->osdep.em_iobase, &sc->osdep.em_iosize, 0)) {
1705 			printf(": cannot find i/o space\n");
1706 			return (ENXIO);
1707 		}
1708 
1709 		sc->hw.io_base = 0;
1710 		break;
1711 	default:
1712 		break;
1713 	}
1714 
1715 	sc->osdep.em_flashoffset = 0;
1716 	/* for ICH8 and family we need to find the flash memory */
1717 	if (sc->hw.mac_type >= em_pch_spt) {
1718 		sc->osdep.flash_bus_space_tag = sc->osdep.mem_bus_space_tag;
1719 		sc->osdep.flash_bus_space_handle = sc->osdep.mem_bus_space_handle;
1720 		sc->osdep.em_flashbase = 0;
1721 		sc->osdep.em_flashsize = 0;
1722 		sc->osdep.em_flashoffset = 0xe000;
1723 	} else if (IS_ICH8(sc->hw.mac_type)) {
1724 		val = pci_conf_read(pa->pa_pc, pa->pa_tag, EM_FLASH);
1725 		if (PCI_MAPREG_TYPE(val) != PCI_MAPREG_TYPE_MEM) {
1726 			printf(": flash is not mem space\n");
1727 			return (ENXIO);
1728 		}
1729 
1730 		if (pci_mapreg_map(pa, EM_FLASH, PCI_MAPREG_MEM_TYPE(val), 0,
1731 		    &sc->osdep.flash_bus_space_tag, &sc->osdep.flash_bus_space_handle,
1732 		    &sc->osdep.em_flashbase, &sc->osdep.em_flashsize, 0)) {
1733 			printf(": cannot find mem space\n");
1734 			return (ENXIO);
1735 		}
1736         }
1737 
1738 	sc->osdep.dev = (struct device *)sc;
1739 	sc->hw.back = &sc->osdep;
1740 
1741 	/* Only one queue for the moment. */
1742 	que = malloc(sizeof(struct em_queue), M_DEVBUF, M_NOWAIT | M_ZERO);
1743 	if (que == NULL) {
1744 		printf(": unable to allocate queue memory\n");
1745 		return (ENOMEM);
1746 	}
1747 	que->me = 0;
1748 	que->sc = sc;
1749 	timeout_set(&que->rx_refill, em_rxrefill, que);
1750 
1751 	sc->queues = que;
1752 	sc->num_queues = 1;
1753 	sc->msix = 0;
1754 	sc->legacy_irq = 0;
1755 	if (em_allocate_msix(sc) && em_allocate_legacy(sc))
1756 		return (ENXIO);
1757 
1758 	/*
1759 	 * the ICP_xxxx device has multiple, duplicate register sets for
1760 	 * use when it is being used as a network processor. Disable those
1761 	 * registers here, as they are not necessary in this context and
1762 	 * can confuse the system
1763 	 */
1764 	if(sc->hw.mac_type == em_icp_xxxx) {
1765 		int offset;
1766 		pcireg_t val;
1767 
1768 		if (!pci_get_capability(sc->osdep.em_pa.pa_pc,
1769 		    sc->osdep.em_pa.pa_tag, PCI_CAP_ID_ST, &offset, &val)) {
1770 			return (0);
1771 		}
1772 		offset += PCI_ST_SMIA_OFFSET;
1773 		pci_conf_write(sc->osdep.em_pa.pa_pc, sc->osdep.em_pa.pa_tag,
1774 		    offset, 0x06);
1775 		E1000_WRITE_REG(&sc->hw, IMC1, ~0x0);
1776 		E1000_WRITE_REG(&sc->hw, IMC2, ~0x0);
1777 	}
1778 	return (0);
1779 }
1780 
1781 void
1782 em_free_pci_resources(struct em_softc *sc)
1783 {
1784 	struct pci_attach_args *pa = &sc->osdep.em_pa;
1785 	pci_chipset_tag_t	pc = pa->pa_pc;
1786 	struct em_queue	       *que = NULL;
1787 	if (sc->sc_intrhand)
1788 		pci_intr_disestablish(pc, sc->sc_intrhand);
1789 	sc->sc_intrhand = 0;
1790 
1791 	if (sc->osdep.em_flashbase)
1792 		bus_space_unmap(sc->osdep.flash_bus_space_tag, sc->osdep.flash_bus_space_handle,
1793 				sc->osdep.em_flashsize);
1794 	sc->osdep.em_flashbase = 0;
1795 
1796 	if (sc->osdep.em_iobase)
1797 		bus_space_unmap(sc->osdep.io_bus_space_tag, sc->osdep.io_bus_space_handle,
1798 				sc->osdep.em_iosize);
1799 	sc->osdep.em_iobase = 0;
1800 
1801 	if (sc->osdep.em_membase)
1802 		bus_space_unmap(sc->osdep.mem_bus_space_tag, sc->osdep.mem_bus_space_handle,
1803 				sc->osdep.em_memsize);
1804 	sc->osdep.em_membase = 0;
1805 
1806 	FOREACH_QUEUE(sc, que) {
1807 		if (que->rx.sc_rx_desc_ring != NULL) {
1808 			que->rx.sc_rx_desc_ring = NULL;
1809 			em_dma_free(sc, &que->rx.sc_rx_dma);
1810 		}
1811 		if (que->tx.sc_tx_desc_ring != NULL) {
1812 			que->tx.sc_tx_desc_ring = NULL;
1813 			em_dma_free(sc, &que->tx.sc_tx_dma);
1814 		}
1815 		if (que->tag)
1816 			pci_intr_disestablish(pc, que->tag);
1817 		que->tag = NULL;
1818 		que->eims = 0;
1819 		que->me = 0;
1820 		que->sc = NULL;
1821 	}
1822 	sc->legacy_irq = 0;
1823 	sc->msix_linkvec = 0;
1824 	sc->msix_queuesmask = 0;
1825 	if (sc->queues)
1826 		free(sc->queues, M_DEVBUF,
1827 		    sc->num_queues * sizeof(struct em_queue));
1828 	sc->num_queues = 0;
1829 	sc->queues = NULL;
1830 }
1831 
1832 /*********************************************************************
1833  *
1834  *  Initialize the hardware to a configuration as specified by the
1835  *  em_softc structure. The controller is reset, the EEPROM is
1836  *  verified, the MAC address is set, then the shared initialization
1837  *  routines are called.
1838  *
1839  **********************************************************************/
1840 int
1841 em_hardware_init(struct em_softc *sc)
1842 {
1843 	uint32_t ret_val;
1844 	u_int16_t rx_buffer_size;
1845 
1846 	INIT_DEBUGOUT("em_hardware_init: begin");
1847 	if (sc->hw.mac_type >= em_pch_spt)
1848 		em_flush_desc_rings(sc);
1849 	/* Issue a global reset */
1850 	em_reset_hw(&sc->hw);
1851 
1852 	/* When hardware is reset, fifo_head is also reset */
1853 	sc->tx_fifo_head = 0;
1854 
1855 	/* Make sure we have a good EEPROM before we read from it */
1856 	if (em_get_flash_presence_i210(&sc->hw) &&
1857 	    em_validate_eeprom_checksum(&sc->hw) < 0) {
1858 		/*
1859 		 * Some PCIe parts fail the first check due to
1860 		 * the link being in sleep state, call it again,
1861 		 * if it fails a second time its a real issue.
1862 		 */
1863 		if (em_validate_eeprom_checksum(&sc->hw) < 0) {
1864 			printf("%s: The EEPROM Checksum Is Not Valid\n",
1865 			       DEVNAME(sc));
1866 			return (EIO);
1867 		}
1868 	}
1869 
1870 	if (em_get_flash_presence_i210(&sc->hw) &&
1871 	    em_read_part_num(&sc->hw, &(sc->part_num)) < 0) {
1872 		printf("%s: EEPROM read error while reading part number\n",
1873 		       DEVNAME(sc));
1874 		return (EIO);
1875 	}
1876 
1877 	/* Set up smart power down as default off on newer adapters */
1878 	if (!em_smart_pwr_down &&
1879 	     (sc->hw.mac_type == em_82571 ||
1880 	      sc->hw.mac_type == em_82572 ||
1881 	      sc->hw.mac_type == em_82575 ||
1882 	      sc->hw.mac_type == em_82576 ||
1883 	      sc->hw.mac_type == em_82580 ||
1884 	      sc->hw.mac_type == em_i210 ||
1885 	      sc->hw.mac_type == em_i350 )) {
1886 		uint16_t phy_tmp = 0;
1887 
1888 		/* Speed up time to link by disabling smart power down */
1889 		em_read_phy_reg(&sc->hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
1890 		phy_tmp &= ~IGP02E1000_PM_SPD;
1891 		em_write_phy_reg(&sc->hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
1892 	}
1893 
1894 	em_legacy_irq_quirk_spt(sc);
1895 
1896 	/*
1897 	 * These parameters control the automatic generation (Tx) and
1898 	 * response (Rx) to Ethernet PAUSE frames.
1899 	 * - High water mark should allow for at least two frames to be
1900 	 *   received after sending an XOFF.
1901 	 * - Low water mark works best when it is very near the high water mark.
1902 	 *   This allows the receiver to restart by sending XON when it has
1903 	 *   drained a bit.  Here we use an arbitrary value of 1500 which will
1904 	 *   restart after one full frame is pulled from the buffer.  There
1905 	 *   could be several smaller frames in the buffer and if so they will
1906 	 *   not trigger the XON until their total number reduces the buffer
1907 	 *   by 1500.
1908 	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
1909 	 */
1910 	rx_buffer_size = ((E1000_READ_REG(&sc->hw, PBA) & 0xffff) << 10 );
1911 
1912 	sc->hw.fc_high_water = rx_buffer_size -
1913 	    EM_ROUNDUP(sc->hw.max_frame_size, 1024);
1914 	sc->hw.fc_low_water = sc->hw.fc_high_water - 1500;
1915 	if (sc->hw.mac_type == em_80003es2lan)
1916 		sc->hw.fc_pause_time = 0xFFFF;
1917 	else
1918 		sc->hw.fc_pause_time = 1000;
1919 	sc->hw.fc_send_xon = TRUE;
1920 	sc->hw.fc = E1000_FC_FULL;
1921 
1922 	em_disable_aspm(sc);
1923 
1924 	if ((ret_val = em_init_hw(sc)) != 0) {
1925 		if (ret_val == E1000_DEFER_INIT) {
1926 			INIT_DEBUGOUT("\nHardware Initialization Deferred ");
1927 			return (EAGAIN);
1928 		}
1929 		printf("\n%s: Hardware Initialization Failed: %d\n",
1930 		       DEVNAME(sc), ret_val);
1931 		return (EIO);
1932 	}
1933 
1934 	em_check_for_link(&sc->hw);
1935 
1936 	return (0);
1937 }
1938 
1939 /*********************************************************************
1940  *
1941  *  Setup networking device structure and register an interface.
1942  *
1943  **********************************************************************/
1944 void
1945 em_setup_interface(struct em_softc *sc)
1946 {
1947 	struct ifnet   *ifp;
1948 	uint64_t fiber_type = IFM_1000_SX;
1949 
1950 	INIT_DEBUGOUT("em_setup_interface: begin");
1951 
1952 	ifp = &sc->sc_ac.ac_if;
1953 	strlcpy(ifp->if_xname, DEVNAME(sc), IFNAMSIZ);
1954 	ifp->if_softc = sc;
1955 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1956 	ifp->if_xflags = IFXF_MPSAFE;
1957 	ifp->if_ioctl = em_ioctl;
1958 	ifp->if_qstart = em_start;
1959 	ifp->if_watchdog = em_watchdog;
1960 	ifp->if_hardmtu =
1961 		sc->hw.max_frame_size - ETHER_HDR_LEN - ETHER_CRC_LEN;
1962 	ifq_set_maxlen(&ifp->if_snd, sc->sc_tx_slots - 1);
1963 
1964 	ifp->if_capabilities = IFCAP_VLAN_MTU;
1965 
1966 #if NVLAN > 0
1967 	if (sc->hw.mac_type != em_82575 && sc->hw.mac_type != em_82580 &&
1968 	    sc->hw.mac_type != em_82576 &&
1969 	    sc->hw.mac_type != em_i210 && sc->hw.mac_type != em_i350)
1970 		ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING;
1971 #endif
1972 
1973 	if (sc->hw.mac_type >= em_82543 && sc->hw.mac_type != em_82575 &&
1974 	    sc->hw.mac_type != em_82576 &&
1975 	    sc->hw.mac_type != em_82580 && sc->hw.mac_type != em_i210 &&
1976 	    sc->hw.mac_type != em_i350)
1977 		ifp->if_capabilities |= IFCAP_CSUM_TCPv4 | IFCAP_CSUM_UDPv4;
1978 
1979 	/*
1980 	 * Specify the media types supported by this adapter and register
1981 	 * callbacks to update media and link information
1982 	 */
1983 	ifmedia_init(&sc->media, IFM_IMASK, em_media_change,
1984 		     em_media_status);
1985 	if (sc->hw.media_type == em_media_type_fiber ||
1986 	    sc->hw.media_type == em_media_type_internal_serdes) {
1987 		if (sc->hw.mac_type == em_82545)
1988 			fiber_type = IFM_1000_LX;
1989 		ifmedia_add(&sc->media, IFM_ETHER | fiber_type | IFM_FDX,
1990 			    0, NULL);
1991 		ifmedia_add(&sc->media, IFM_ETHER | fiber_type,
1992 			    0, NULL);
1993 	} else {
1994 		ifmedia_add(&sc->media, IFM_ETHER | IFM_10_T, 0, NULL);
1995 		ifmedia_add(&sc->media, IFM_ETHER | IFM_10_T | IFM_FDX,
1996 			    0, NULL);
1997 		ifmedia_add(&sc->media, IFM_ETHER | IFM_100_TX,
1998 			    0, NULL);
1999 		ifmedia_add(&sc->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2000 			    0, NULL);
2001 		if (sc->hw.phy_type != em_phy_ife) {
2002 			ifmedia_add(&sc->media, IFM_ETHER | IFM_1000_T | IFM_FDX,
2003 				    0, NULL);
2004 			ifmedia_add(&sc->media, IFM_ETHER | IFM_1000_T, 0, NULL);
2005 		}
2006 	}
2007 	ifmedia_add(&sc->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2008 	ifmedia_set(&sc->media, IFM_ETHER | IFM_AUTO);
2009 
2010 	if_attach(ifp);
2011 	ether_ifattach(ifp);
2012 	em_enable_intr(sc);
2013 }
2014 
2015 int
2016 em_detach(struct device *self, int flags)
2017 {
2018 	struct em_softc *sc = (struct em_softc *)self;
2019 	struct ifnet *ifp = &sc->sc_ac.ac_if;
2020 	struct pci_attach_args *pa = &sc->osdep.em_pa;
2021 	pci_chipset_tag_t	pc = pa->pa_pc;
2022 
2023 	if (sc->sc_intrhand)
2024 		pci_intr_disestablish(pc, sc->sc_intrhand);
2025 	sc->sc_intrhand = 0;
2026 
2027 	em_stop(sc, 1);
2028 
2029 	em_free_pci_resources(sc);
2030 
2031 	ether_ifdetach(ifp);
2032 	if_detach(ifp);
2033 
2034 	return (0);
2035 }
2036 
2037 int
2038 em_activate(struct device *self, int act)
2039 {
2040 	struct em_softc *sc = (struct em_softc *)self;
2041 	struct ifnet *ifp = &sc->sc_ac.ac_if;
2042 	int rv = 0;
2043 
2044 	switch (act) {
2045 	case DVACT_SUSPEND:
2046 		if (ifp->if_flags & IFF_RUNNING)
2047 			em_stop(sc, 0);
2048 		/* We have no children atm, but we will soon */
2049 		rv = config_activate_children(self, act);
2050 		break;
2051 	case DVACT_RESUME:
2052 		if (ifp->if_flags & IFF_UP)
2053 			em_init(sc);
2054 		break;
2055 	default:
2056 		rv = config_activate_children(self, act);
2057 		break;
2058 	}
2059 	return (rv);
2060 }
2061 
2062 /*********************************************************************
2063  *
2064  *  Workaround for SmartSpeed on 82541 and 82547 controllers
2065  *
2066  **********************************************************************/
2067 void
2068 em_smartspeed(struct em_softc *sc)
2069 {
2070 	uint16_t phy_tmp;
2071 
2072 	if (sc->link_active || (sc->hw.phy_type != em_phy_igp) ||
2073 	    !sc->hw.autoneg || !(sc->hw.autoneg_advertised & ADVERTISE_1000_FULL))
2074 		return;
2075 
2076 	if (sc->smartspeed == 0) {
2077 		/* If Master/Slave config fault is asserted twice,
2078 		 * we assume back-to-back */
2079 		em_read_phy_reg(&sc->hw, PHY_1000T_STATUS, &phy_tmp);
2080 		if (!(phy_tmp & SR_1000T_MS_CONFIG_FAULT))
2081 			return;
2082 		em_read_phy_reg(&sc->hw, PHY_1000T_STATUS, &phy_tmp);
2083 		if (phy_tmp & SR_1000T_MS_CONFIG_FAULT) {
2084 			em_read_phy_reg(&sc->hw, PHY_1000T_CTRL,
2085 					&phy_tmp);
2086 			if (phy_tmp & CR_1000T_MS_ENABLE) {
2087 				phy_tmp &= ~CR_1000T_MS_ENABLE;
2088 				em_write_phy_reg(&sc->hw,
2089 						    PHY_1000T_CTRL, phy_tmp);
2090 				sc->smartspeed++;
2091 				if (sc->hw.autoneg &&
2092 				    !em_phy_setup_autoneg(&sc->hw) &&
2093 				    !em_read_phy_reg(&sc->hw, PHY_CTRL,
2094 						       &phy_tmp)) {
2095 					phy_tmp |= (MII_CR_AUTO_NEG_EN |
2096 						    MII_CR_RESTART_AUTO_NEG);
2097 					em_write_phy_reg(&sc->hw,
2098 							 PHY_CTRL, phy_tmp);
2099 				}
2100 			}
2101 		}
2102 		return;
2103 	} else if (sc->smartspeed == EM_SMARTSPEED_DOWNSHIFT) {
2104 		/* If still no link, perhaps using 2/3 pair cable */
2105 		em_read_phy_reg(&sc->hw, PHY_1000T_CTRL, &phy_tmp);
2106 		phy_tmp |= CR_1000T_MS_ENABLE;
2107 		em_write_phy_reg(&sc->hw, PHY_1000T_CTRL, phy_tmp);
2108 		if (sc->hw.autoneg &&
2109 		    !em_phy_setup_autoneg(&sc->hw) &&
2110 		    !em_read_phy_reg(&sc->hw, PHY_CTRL, &phy_tmp)) {
2111 			phy_tmp |= (MII_CR_AUTO_NEG_EN |
2112 				    MII_CR_RESTART_AUTO_NEG);
2113 			em_write_phy_reg(&sc->hw, PHY_CTRL, phy_tmp);
2114 		}
2115 	}
2116 	/* Restart process after EM_SMARTSPEED_MAX iterations */
2117 	if (sc->smartspeed++ == EM_SMARTSPEED_MAX)
2118 		sc->smartspeed = 0;
2119 }
2120 
2121 /*
2122  * Manage DMA'able memory.
2123  */
2124 int
2125 em_dma_malloc(struct em_softc *sc, bus_size_t size, struct em_dma_alloc *dma)
2126 {
2127 	int r;
2128 
2129 	r = bus_dmamap_create(sc->sc_dmat, size, 1,
2130 	    size, 0, BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, &dma->dma_map);
2131 	if (r != 0)
2132 		return (r);
2133 
2134 	r = bus_dmamem_alloc(sc->sc_dmat, size, PAGE_SIZE, 0, &dma->dma_seg,
2135 	    1, &dma->dma_nseg, BUS_DMA_WAITOK | BUS_DMA_ZERO);
2136 	if (r != 0)
2137 		goto destroy;
2138 
2139 	r = bus_dmamem_map(sc->sc_dmat, &dma->dma_seg, dma->dma_nseg, size,
2140 	    &dma->dma_vaddr, BUS_DMA_WAITOK | BUS_DMA_COHERENT);
2141 	if (r != 0)
2142 		goto free;
2143 
2144 	r = bus_dmamap_load(sc->sc_dmat, dma->dma_map, dma->dma_vaddr, size,
2145 	    NULL, BUS_DMA_WAITOK);
2146 	if (r != 0)
2147 		goto unmap;
2148 
2149 	dma->dma_size = size;
2150 	return (0);
2151 
2152 unmap:
2153 	bus_dmamem_unmap(sc->sc_dmat, dma->dma_vaddr, size);
2154 free:
2155 	bus_dmamem_free(sc->sc_dmat, &dma->dma_seg, dma->dma_nseg);
2156 destroy:
2157 	bus_dmamap_destroy(sc->sc_dmat, dma->dma_map);
2158 
2159 	return (r);
2160 }
2161 
2162 void
2163 em_dma_free(struct em_softc *sc, struct em_dma_alloc *dma)
2164 {
2165 	bus_dmamap_unload(sc->sc_dmat, dma->dma_map);
2166 	bus_dmamem_unmap(sc->sc_dmat, dma->dma_vaddr, dma->dma_size);
2167 	bus_dmamem_free(sc->sc_dmat, &dma->dma_seg, dma->dma_nseg);
2168 	bus_dmamap_destroy(sc->sc_dmat, dma->dma_map);
2169 }
2170 
2171 /*********************************************************************
2172  *
2173  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2174  *  the information needed to transmit a packet on the wire.
2175  *
2176  **********************************************************************/
2177 int
2178 em_allocate_transmit_structures(struct em_softc *sc)
2179 {
2180 	struct em_queue *que;
2181 
2182 	FOREACH_QUEUE(sc, que) {
2183 		bus_dmamap_sync(sc->sc_dmat, que->tx.sc_tx_dma.dma_map,
2184 		    0, que->tx.sc_tx_dma.dma_map->dm_mapsize,
2185 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2186 
2187 		que->tx.sc_tx_pkts_ring = mallocarray(sc->sc_tx_slots,
2188 		    sizeof(*que->tx.sc_tx_pkts_ring), M_DEVBUF, M_NOWAIT | M_ZERO);
2189 		if (que->tx.sc_tx_pkts_ring == NULL) {
2190 			printf("%s: Unable to allocate tx_buffer memory\n",
2191 			    DEVNAME(sc));
2192 			return (ENOMEM);
2193 		}
2194 	}
2195 
2196 	return (0);
2197 }
2198 
2199 /*********************************************************************
2200  *
2201  *  Allocate and initialize transmit structures.
2202  *
2203  **********************************************************************/
2204 int
2205 em_setup_transmit_structures(struct em_softc *sc)
2206 {
2207 	struct em_queue *que;
2208 	struct em_packet *pkt;
2209 	int error, i;
2210 
2211 	if ((error = em_allocate_transmit_structures(sc)) != 0)
2212 		goto fail;
2213 
2214 	FOREACH_QUEUE(sc, que) {
2215 		bzero((void *) que->tx.sc_tx_desc_ring,
2216 		    (sizeof(struct em_tx_desc)) * sc->sc_tx_slots);
2217 
2218 		for (i = 0; i < sc->sc_tx_slots; i++) {
2219 			pkt = &que->tx.sc_tx_pkts_ring[i];
2220 			error = bus_dmamap_create(sc->sc_dmat, MAX_JUMBO_FRAME_SIZE,
2221 			    EM_MAX_SCATTER / (sc->pcix_82544 ? 2 : 1),
2222 			    MAX_JUMBO_FRAME_SIZE, 0, BUS_DMA_NOWAIT, &pkt->pkt_map);
2223 			if (error != 0) {
2224 				printf("%s: Unable to create TX DMA map\n",
2225 				    DEVNAME(sc));
2226 				goto fail;
2227 			}
2228 		}
2229 
2230 		que->tx.sc_tx_desc_head = 0;
2231 		que->tx.sc_tx_desc_tail = 0;
2232 
2233 		/* Set checksum context */
2234 		que->tx.active_checksum_context = OFFLOAD_NONE;
2235 	}
2236 
2237 	return (0);
2238 
2239 fail:
2240 	em_free_transmit_structures(sc);
2241 	return (error);
2242 }
2243 
2244 /*********************************************************************
2245  *
2246  *  Enable transmit unit.
2247  *
2248  **********************************************************************/
2249 void
2250 em_initialize_transmit_unit(struct em_softc *sc)
2251 {
2252 	u_int32_t	reg_tctl, reg_tipg = 0;
2253 	u_int64_t	bus_addr;
2254 	struct em_queue *que;
2255 
2256 	INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
2257 
2258 	FOREACH_QUEUE(sc, que) {
2259 		/* Setup the Base and Length of the Tx Descriptor Ring */
2260 		bus_addr = que->tx.sc_tx_dma.dma_map->dm_segs[0].ds_addr;
2261 		E1000_WRITE_REG(&sc->hw, TDLEN(que->me),
2262 		    sc->sc_tx_slots *
2263 		    sizeof(struct em_tx_desc));
2264 		E1000_WRITE_REG(&sc->hw, TDBAH(que->me), (u_int32_t)(bus_addr >> 32));
2265 		E1000_WRITE_REG(&sc->hw, TDBAL(que->me), (u_int32_t)bus_addr);
2266 
2267 		/* Setup the HW Tx Head and Tail descriptor pointers */
2268 		E1000_WRITE_REG(&sc->hw, TDT(que->me), 0);
2269 		E1000_WRITE_REG(&sc->hw, TDH(que->me), 0);
2270 
2271 		HW_DEBUGOUT2("Base = %x, Length = %x\n",
2272 		    E1000_READ_REG(&sc->hw, TDBAL(que->me)),
2273 		    E1000_READ_REG(&sc->hw, TDLEN(que->me)));
2274 
2275 		/* Set the default values for the Tx Inter Packet Gap timer */
2276 		switch (sc->hw.mac_type) {
2277 		case em_82542_rev2_0:
2278 		case em_82542_rev2_1:
2279 			reg_tipg = DEFAULT_82542_TIPG_IPGT;
2280 			reg_tipg |= DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
2281 			reg_tipg |= DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
2282 			break;
2283 		case em_80003es2lan:
2284 			reg_tipg = DEFAULT_82543_TIPG_IPGR1;
2285 			reg_tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
2286 			break;
2287 		default:
2288 			if (sc->hw.media_type == em_media_type_fiber ||
2289 			    sc->hw.media_type == em_media_type_internal_serdes)
2290 				reg_tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
2291 			else
2292 				reg_tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
2293 			reg_tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
2294 			reg_tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
2295 		}
2296 
2297 
2298 		E1000_WRITE_REG(&sc->hw, TIPG, reg_tipg);
2299 		E1000_WRITE_REG(&sc->hw, TIDV, sc->tx_int_delay);
2300 		if (sc->hw.mac_type >= em_82540)
2301 			E1000_WRITE_REG(&sc->hw, TADV, sc->tx_abs_int_delay);
2302 
2303 		/* Setup Transmit Descriptor Base Settings */
2304 		que->tx.sc_txd_cmd = E1000_TXD_CMD_IFCS;
2305 
2306 		if (sc->hw.mac_type == em_82575 || sc->hw.mac_type == em_82580 ||
2307 		    sc->hw.mac_type == em_82576 ||
2308 		    sc->hw.mac_type == em_i210 || sc->hw.mac_type == em_i350) {
2309 			/* 82575/6 need to enable the TX queue and lack the IDE bit */
2310 			reg_tctl = E1000_READ_REG(&sc->hw, TXDCTL(que->me));
2311 			reg_tctl |= E1000_TXDCTL_QUEUE_ENABLE;
2312 			E1000_WRITE_REG(&sc->hw, TXDCTL(que->me), reg_tctl);
2313 		} else if (sc->tx_int_delay > 0)
2314 			que->tx.sc_txd_cmd |= E1000_TXD_CMD_IDE;
2315 	}
2316 
2317 	/* Program the Transmit Control Register */
2318 	reg_tctl = E1000_TCTL_PSP | E1000_TCTL_EN |
2319 		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2320 	if (sc->hw.mac_type >= em_82571)
2321 		reg_tctl |= E1000_TCTL_MULR;
2322 	if (sc->link_duplex == FULL_DUPLEX)
2323 		reg_tctl |= E1000_FDX_COLLISION_DISTANCE << E1000_COLD_SHIFT;
2324 	else
2325 		reg_tctl |= E1000_HDX_COLLISION_DISTANCE << E1000_COLD_SHIFT;
2326 	/* This write will effectively turn on the transmit unit */
2327 	E1000_WRITE_REG(&sc->hw, TCTL, reg_tctl);
2328 
2329 	/* SPT Si errata workaround to avoid data corruption */
2330 
2331 	if (sc->hw.mac_type == em_pch_spt) {
2332 		uint32_t	reg_val;
2333 
2334 		reg_val = EM_READ_REG(&sc->hw, E1000_IOSFPC);
2335 		reg_val |= E1000_RCTL_RDMTS_HEX;
2336 		EM_WRITE_REG(&sc->hw, E1000_IOSFPC, reg_val);
2337 
2338 		reg_val = E1000_READ_REG(&sc->hw, TARC0);
2339 		/* i218-i219 Specification Update 1.5.4.5 */
2340 		reg_val &= ~E1000_TARC0_CB_MULTIQ_3_REQ;
2341 		reg_val |= E1000_TARC0_CB_MULTIQ_2_REQ;
2342 		E1000_WRITE_REG(&sc->hw, TARC0, reg_val);
2343 	}
2344 }
2345 
2346 /*********************************************************************
2347  *
2348  *  Free all transmit related data structures.
2349  *
2350  **********************************************************************/
2351 void
2352 em_free_transmit_structures(struct em_softc *sc)
2353 {
2354 	struct em_queue *que;
2355 	struct em_packet *pkt;
2356 	int i;
2357 
2358 	INIT_DEBUGOUT("free_transmit_structures: begin");
2359 
2360 	FOREACH_QUEUE(sc, que) {
2361 		if (que->tx.sc_tx_pkts_ring != NULL) {
2362 			for (i = 0; i < sc->sc_tx_slots; i++) {
2363 				pkt = &que->tx.sc_tx_pkts_ring[i];
2364 
2365 				if (pkt->pkt_m != NULL) {
2366 					bus_dmamap_sync(sc->sc_dmat, pkt->pkt_map,
2367 					    0, pkt->pkt_map->dm_mapsize,
2368 					    BUS_DMASYNC_POSTWRITE);
2369 					bus_dmamap_unload(sc->sc_dmat,
2370 					    pkt->pkt_map);
2371 
2372 					m_freem(pkt->pkt_m);
2373 					pkt->pkt_m = NULL;
2374 				}
2375 
2376 				if (pkt->pkt_map != NULL) {
2377 					bus_dmamap_destroy(sc->sc_dmat,
2378 					    pkt->pkt_map);
2379 					pkt->pkt_map = NULL;
2380 				}
2381 			}
2382 
2383 			free(que->tx.sc_tx_pkts_ring, M_DEVBUF,
2384 			    sc->sc_tx_slots * sizeof(*que->tx.sc_tx_pkts_ring));
2385 			que->tx.sc_tx_pkts_ring = NULL;
2386 		}
2387 
2388 		bus_dmamap_sync(sc->sc_dmat, que->tx.sc_tx_dma.dma_map,
2389 		    0, que->tx.sc_tx_dma.dma_map->dm_mapsize,
2390 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2391 	}
2392 }
2393 
2394 /*********************************************************************
2395  *
2396  *  The offload context needs to be set when we transfer the first
2397  *  packet of a particular protocol (TCP/UDP). We change the
2398  *  context only if the protocol type changes.
2399  *
2400  **********************************************************************/
2401 u_int
2402 em_transmit_checksum_setup(struct em_queue *que, struct mbuf *mp, u_int head,
2403     u_int32_t *txd_upper, u_int32_t *txd_lower)
2404 {
2405 	struct em_context_desc *TXD;
2406 
2407 	if (mp->m_pkthdr.csum_flags & M_TCP_CSUM_OUT) {
2408 		*txd_upper = E1000_TXD_POPTS_TXSM << 8;
2409 		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
2410 		if (que->tx.active_checksum_context == OFFLOAD_TCP_IP)
2411 			return (0);
2412 		else
2413 			que->tx.active_checksum_context = OFFLOAD_TCP_IP;
2414 	} else if (mp->m_pkthdr.csum_flags & M_UDP_CSUM_OUT) {
2415 		*txd_upper = E1000_TXD_POPTS_TXSM << 8;
2416 		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
2417 		if (que->tx.active_checksum_context == OFFLOAD_UDP_IP)
2418 			return (0);
2419 		else
2420 			que->tx.active_checksum_context = OFFLOAD_UDP_IP;
2421 	} else {
2422 		*txd_upper = 0;
2423 		*txd_lower = 0;
2424 		return (0);
2425 	}
2426 
2427 	/* If we reach this point, the checksum offload context
2428 	 * needs to be reset.
2429 	 */
2430 	TXD = (struct em_context_desc *)&que->tx.sc_tx_desc_ring[head];
2431 
2432 	TXD->lower_setup.ip_fields.ipcss = ETHER_HDR_LEN;
2433 	TXD->lower_setup.ip_fields.ipcso =
2434 	    ETHER_HDR_LEN + offsetof(struct ip, ip_sum);
2435 	TXD->lower_setup.ip_fields.ipcse =
2436 	    htole16(ETHER_HDR_LEN + sizeof(struct ip) - 1);
2437 
2438 	TXD->upper_setup.tcp_fields.tucss =
2439 	    ETHER_HDR_LEN + sizeof(struct ip);
2440 	TXD->upper_setup.tcp_fields.tucse = htole16(0);
2441 
2442 	if (que->tx.active_checksum_context == OFFLOAD_TCP_IP) {
2443 		TXD->upper_setup.tcp_fields.tucso =
2444 		    ETHER_HDR_LEN + sizeof(struct ip) +
2445 		    offsetof(struct tcphdr, th_sum);
2446 	} else if (que->tx.active_checksum_context == OFFLOAD_UDP_IP) {
2447 		TXD->upper_setup.tcp_fields.tucso =
2448 		    ETHER_HDR_LEN + sizeof(struct ip) +
2449 		    offsetof(struct udphdr, uh_sum);
2450 	}
2451 
2452 	TXD->tcp_seg_setup.data = htole32(0);
2453 	TXD->cmd_and_length = htole32(que->tx.sc_txd_cmd | E1000_TXD_CMD_DEXT);
2454 
2455 	return (1);
2456 }
2457 
2458 /**********************************************************************
2459  *
2460  *  Examine each tx_buffer in the used queue. If the hardware is done
2461  *  processing the packet then free associated resources. The
2462  *  tx_buffer is put back on the free queue.
2463  *
2464  **********************************************************************/
2465 void
2466 em_txeof(struct em_queue *que)
2467 {
2468 	struct em_softc *sc = que->sc;
2469 	struct ifnet *ifp = &sc->sc_ac.ac_if;
2470 	struct em_packet *pkt;
2471 	struct em_tx_desc *desc;
2472 	u_int head, tail;
2473 	u_int free = 0;
2474 
2475 	head = que->tx.sc_tx_desc_head;
2476 	tail = que->tx.sc_tx_desc_tail;
2477 
2478 	if (head == tail)
2479 		return;
2480 
2481 	bus_dmamap_sync(sc->sc_dmat, que->tx.sc_tx_dma.dma_map,
2482 	    0, que->tx.sc_tx_dma.dma_map->dm_mapsize,
2483 	    BUS_DMASYNC_POSTREAD);
2484 
2485 	do {
2486 		pkt = &que->tx.sc_tx_pkts_ring[tail];
2487 		desc = &que->tx.sc_tx_desc_ring[pkt->pkt_eop];
2488 
2489 		if (!ISSET(desc->upper.fields.status, E1000_TXD_STAT_DD))
2490 			break;
2491 
2492 		bus_dmamap_sync(sc->sc_dmat, pkt->pkt_map,
2493 		    0, pkt->pkt_map->dm_mapsize,
2494 		    BUS_DMASYNC_POSTWRITE);
2495 		bus_dmamap_unload(sc->sc_dmat, pkt->pkt_map);
2496 
2497 		KASSERT(pkt->pkt_m != NULL);
2498 
2499 		m_freem(pkt->pkt_m);
2500 		pkt->pkt_m = NULL;
2501 
2502 		tail = pkt->pkt_eop;
2503 
2504 		if (++tail == sc->sc_tx_slots)
2505 			tail = 0;
2506 
2507 		free++;
2508 	} while (tail != head);
2509 
2510 	bus_dmamap_sync(sc->sc_dmat, que->tx.sc_tx_dma.dma_map,
2511 	    0, que->tx.sc_tx_dma.dma_map->dm_mapsize,
2512 	    BUS_DMASYNC_PREREAD);
2513 
2514 	if (free == 0)
2515 		return;
2516 
2517 	que->tx.sc_tx_desc_tail = tail;
2518 
2519 	if (ifq_is_oactive(&ifp->if_snd))
2520 		ifq_restart(&ifp->if_snd);
2521 	else if (tail == head)
2522 		ifp->if_timer = 0;
2523 }
2524 
2525 /*********************************************************************
2526  *
2527  *  Get a buffer from system mbuf buffer pool.
2528  *
2529  **********************************************************************/
2530 int
2531 em_get_buf(struct em_queue *que, int i)
2532 {
2533 	struct em_softc *sc = que->sc;
2534 	struct mbuf    *m;
2535 	struct em_packet *pkt;
2536 	struct em_rx_desc *desc;
2537 	int error;
2538 
2539 	pkt = &que->rx.sc_rx_pkts_ring[i];
2540 	desc = &que->rx.sc_rx_desc_ring[i];
2541 
2542 	KASSERT(pkt->pkt_m == NULL);
2543 
2544 	m = MCLGETL(NULL, M_DONTWAIT, EM_MCLBYTES);
2545 	if (m == NULL) {
2546 		sc->mbuf_cluster_failed++;
2547 		return (ENOBUFS);
2548 	}
2549 	m->m_len = m->m_pkthdr.len = EM_MCLBYTES;
2550 	m_adj(m, ETHER_ALIGN);
2551 
2552 	error = bus_dmamap_load_mbuf(sc->sc_dmat, pkt->pkt_map,
2553 	    m, BUS_DMA_NOWAIT);
2554 	if (error) {
2555 		m_freem(m);
2556 		return (error);
2557 	}
2558 
2559 	bus_dmamap_sync(sc->sc_dmat, pkt->pkt_map,
2560 	    0, pkt->pkt_map->dm_mapsize,
2561 	    BUS_DMASYNC_PREREAD);
2562 	pkt->pkt_m = m;
2563 
2564 	memset(desc, 0, sizeof(*desc));
2565 	htolem64(&desc->buffer_addr, pkt->pkt_map->dm_segs[0].ds_addr);
2566 
2567 	return (0);
2568 }
2569 
2570 /*********************************************************************
2571  *
2572  *  Allocate memory for rx_buffer structures. Since we use one
2573  *  rx_buffer per received packet, the maximum number of rx_buffer's
2574  *  that we'll need is equal to the number of receive descriptors
2575  *  that we've allocated.
2576  *
2577  **********************************************************************/
2578 int
2579 em_allocate_receive_structures(struct em_softc *sc)
2580 {
2581 	struct em_queue *que;
2582 	struct em_packet *pkt;
2583 	int i;
2584 	int error;
2585 
2586 	FOREACH_QUEUE(sc, que) {
2587 		que->rx.sc_rx_pkts_ring = mallocarray(sc->sc_rx_slots,
2588 		    sizeof(*que->rx.sc_rx_pkts_ring),
2589 		    M_DEVBUF, M_NOWAIT | M_ZERO);
2590 		if (que->rx.sc_rx_pkts_ring == NULL) {
2591 			printf("%s: Unable to allocate rx_buffer memory\n",
2592 			    DEVNAME(sc));
2593 			return (ENOMEM);
2594 		}
2595 
2596 		bus_dmamap_sync(sc->sc_dmat, que->rx.sc_rx_dma.dma_map,
2597 		    0, que->rx.sc_rx_dma.dma_map->dm_mapsize,
2598 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2599 
2600 		for (i = 0; i < sc->sc_rx_slots; i++) {
2601 			pkt = &que->rx.sc_rx_pkts_ring[i];
2602 
2603 			error = bus_dmamap_create(sc->sc_dmat, EM_MCLBYTES, 1,
2604 			    EM_MCLBYTES, 0, BUS_DMA_NOWAIT, &pkt->pkt_map);
2605 			if (error != 0) {
2606 				printf("%s: em_allocate_receive_structures: "
2607 				    "bus_dmamap_create failed; error %u\n",
2608 				    DEVNAME(sc), error);
2609 				goto fail;
2610 			}
2611 
2612 			pkt->pkt_m = NULL;
2613 		}
2614 	}
2615 
2616         return (0);
2617 
2618 fail:
2619 	em_free_receive_structures(sc);
2620 	return (error);
2621 }
2622 
2623 /*********************************************************************
2624  *
2625  *  Allocate and initialize receive structures.
2626  *
2627  **********************************************************************/
2628 int
2629 em_setup_receive_structures(struct em_softc *sc)
2630 {
2631 	struct ifnet *ifp = &sc->sc_ac.ac_if;
2632 	struct em_queue *que;
2633 	u_int lwm;
2634 
2635 	if (em_allocate_receive_structures(sc))
2636 		return (ENOMEM);
2637 
2638 	FOREACH_QUEUE(sc, que) {
2639 		memset(que->rx.sc_rx_desc_ring, 0,
2640 		    sc->sc_rx_slots * sizeof(*que->rx.sc_rx_desc_ring));
2641 
2642 		/* Setup our descriptor pointers */
2643 		que->rx.sc_rx_desc_tail = 0;
2644 		que->rx.sc_rx_desc_head = sc->sc_rx_slots - 1;
2645 
2646 		lwm = max(4, 2 * ((ifp->if_hardmtu / MCLBYTES) + 1));
2647 		if_rxr_init(&que->rx.sc_rx_ring, lwm, sc->sc_rx_slots);
2648 
2649 		if (em_rxfill(que) == 0) {
2650 			printf("%s: unable to fill any rx descriptors\n",
2651 			    DEVNAME(sc));
2652 			return (ENOMEM);
2653 		}
2654 	}
2655 
2656 	return (0);
2657 }
2658 
2659 /*********************************************************************
2660  *
2661  *  Enable receive unit.
2662  *
2663  **********************************************************************/
2664 void
2665 em_initialize_receive_unit(struct em_softc *sc)
2666 {
2667 	struct em_queue *que;
2668 	u_int32_t	reg_rctl;
2669 	u_int32_t	reg_rxcsum;
2670 	u_int32_t	reg_srrctl;
2671 	u_int64_t	bus_addr;
2672 
2673 	INIT_DEBUGOUT("em_initialize_receive_unit: begin");
2674 
2675 	/* Make sure receives are disabled while setting up the descriptor ring */
2676 	E1000_WRITE_REG(&sc->hw, RCTL, 0);
2677 
2678 	/* Set the Receive Delay Timer Register */
2679 	E1000_WRITE_REG(&sc->hw, RDTR,
2680 			sc->rx_int_delay | E1000_RDT_FPDB);
2681 
2682 	if (sc->hw.mac_type >= em_82540) {
2683 		if (sc->rx_int_delay)
2684 			E1000_WRITE_REG(&sc->hw, RADV, sc->rx_abs_int_delay);
2685 
2686 		/* Set the interrupt throttling rate.  Value is calculated
2687 		 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns) */
2688 		E1000_WRITE_REG(&sc->hw, ITR, DEFAULT_ITR);
2689 	}
2690 
2691 	/* Setup the Receive Control Register */
2692 	reg_rctl = E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
2693 	    E1000_RCTL_RDMTS_HALF |
2694 	    (sc->hw.mc_filter_type << E1000_RCTL_MO_SHIFT);
2695 
2696 	if (sc->hw.tbi_compatibility_on == TRUE)
2697 		reg_rctl |= E1000_RCTL_SBP;
2698 
2699 	/*
2700 	 * The i350 has a bug where it always strips the CRC whether
2701 	 * asked to or not.  So ask for stripped CRC here and
2702 	 * cope in rxeof
2703 	 */
2704 	if (sc->hw.mac_type == em_i210 || sc->hw.mac_type == em_i350)
2705 		reg_rctl |= E1000_RCTL_SECRC;
2706 
2707 	switch (sc->sc_rx_buffer_len) {
2708 	default:
2709 	case EM_RXBUFFER_2048:
2710 		reg_rctl |= E1000_RCTL_SZ_2048;
2711 		break;
2712 	case EM_RXBUFFER_4096:
2713 		reg_rctl |= E1000_RCTL_SZ_4096|E1000_RCTL_BSEX|E1000_RCTL_LPE;
2714 		break;
2715 	case EM_RXBUFFER_8192:
2716 		reg_rctl |= E1000_RCTL_SZ_8192|E1000_RCTL_BSEX|E1000_RCTL_LPE;
2717 		break;
2718 	case EM_RXBUFFER_16384:
2719 		reg_rctl |= E1000_RCTL_SZ_16384|E1000_RCTL_BSEX|E1000_RCTL_LPE;
2720 		break;
2721 	}
2722 
2723 	if (sc->hw.max_frame_size != ETHER_MAX_LEN)
2724 		reg_rctl |= E1000_RCTL_LPE;
2725 
2726 	/* Enable 82543 Receive Checksum Offload for TCP and UDP */
2727 	if (sc->hw.mac_type >= em_82543) {
2728 		reg_rxcsum = E1000_READ_REG(&sc->hw, RXCSUM);
2729 		reg_rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
2730 		E1000_WRITE_REG(&sc->hw, RXCSUM, reg_rxcsum);
2731 	}
2732 
2733 	/*
2734 	 * XXX TEMPORARY WORKAROUND: on some systems with 82573
2735 	 * long latencies are observed, like Lenovo X60.
2736 	 */
2737 	if (sc->hw.mac_type == em_82573)
2738 		E1000_WRITE_REG(&sc->hw, RDTR, 0x20);
2739 
2740 	FOREACH_QUEUE(sc, que) {
2741 		if (sc->num_queues > 1) {
2742 			/*
2743 			 * Disable Drop Enable for every queue, default has
2744 			 * it enabled for queues > 0
2745 			 */
2746 			reg_srrctl = E1000_READ_REG(&sc->hw, SRRCTL(que->me));
2747 			reg_srrctl &= ~E1000_SRRCTL_DROP_EN;
2748 			E1000_WRITE_REG(&sc->hw, SRRCTL(que->me), reg_srrctl);
2749 		}
2750 
2751 		/* Setup the Base and Length of the Rx Descriptor Ring */
2752 		bus_addr = que->rx.sc_rx_dma.dma_map->dm_segs[0].ds_addr;
2753 		E1000_WRITE_REG(&sc->hw, RDLEN(que->me),
2754 		    sc->sc_rx_slots * sizeof(*que->rx.sc_rx_desc_ring));
2755 		E1000_WRITE_REG(&sc->hw, RDBAH(que->me), (u_int32_t)(bus_addr >> 32));
2756 		E1000_WRITE_REG(&sc->hw, RDBAL(que->me), (u_int32_t)bus_addr);
2757 
2758 		if (sc->hw.mac_type == em_82575 || sc->hw.mac_type == em_82580 ||
2759 		    sc->hw.mac_type == em_82576 ||
2760 		    sc->hw.mac_type == em_i210 || sc->hw.mac_type == em_i350) {
2761 			/* 82575/6 need to enable the RX queue */
2762 			uint32_t reg;
2763 			reg = E1000_READ_REG(&sc->hw, RXDCTL(que->me));
2764 			reg |= E1000_RXDCTL_QUEUE_ENABLE;
2765 			E1000_WRITE_REG(&sc->hw, RXDCTL(que->me), reg);
2766 		}
2767 	}
2768 
2769 	/* Enable Receives */
2770 	E1000_WRITE_REG(&sc->hw, RCTL, reg_rctl);
2771 
2772 	/* Setup the HW Rx Head and Tail Descriptor Pointers */
2773 	FOREACH_QUEUE(sc, que) {
2774 		E1000_WRITE_REG(&sc->hw, RDH(que->me), 0);
2775 		E1000_WRITE_REG(&sc->hw, RDT(que->me), que->rx.sc_rx_desc_head);
2776 	}
2777 }
2778 
2779 /*********************************************************************
2780  *
2781  *  Free receive related data structures.
2782  *
2783  **********************************************************************/
2784 void
2785 em_free_receive_structures(struct em_softc *sc)
2786 {
2787 	struct em_queue *que;
2788 	struct em_packet *pkt;
2789 	int i;
2790 
2791 	INIT_DEBUGOUT("free_receive_structures: begin");
2792 
2793 	FOREACH_QUEUE(sc, que) {
2794 		if_rxr_init(&que->rx.sc_rx_ring, 0, 0);
2795 
2796 		bus_dmamap_sync(sc->sc_dmat, que->rx.sc_rx_dma.dma_map,
2797 		    0, que->rx.sc_rx_dma.dma_map->dm_mapsize,
2798 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2799 
2800 		if (que->rx.sc_rx_pkts_ring != NULL) {
2801 			for (i = 0; i < sc->sc_rx_slots; i++) {
2802 				pkt = &que->rx.sc_rx_pkts_ring[i];
2803 				if (pkt->pkt_m != NULL) {
2804 					bus_dmamap_sync(sc->sc_dmat,
2805 					    pkt->pkt_map,
2806 					    0, pkt->pkt_map->dm_mapsize,
2807 					    BUS_DMASYNC_POSTREAD);
2808 					bus_dmamap_unload(sc->sc_dmat,
2809 					    pkt->pkt_map);
2810 					m_freem(pkt->pkt_m);
2811 					pkt->pkt_m = NULL;
2812 				}
2813 				bus_dmamap_destroy(sc->sc_dmat, pkt->pkt_map);
2814 			}
2815 
2816 			free(que->rx.sc_rx_pkts_ring, M_DEVBUF,
2817 			    sc->sc_rx_slots * sizeof(*que->rx.sc_rx_pkts_ring));
2818 			que->rx.sc_rx_pkts_ring = NULL;
2819 		}
2820 
2821 		if (que->rx.fmp != NULL) {
2822 			m_freem(que->rx.fmp);
2823 			que->rx.fmp = NULL;
2824 			que->rx.lmp = NULL;
2825 		}
2826 	}
2827 }
2828 
2829 int
2830 em_rxfill(struct em_queue *que)
2831 {
2832 	struct em_softc *sc = que->sc;
2833 	u_int slots;
2834 	int post = 0;
2835 	int i;
2836 
2837 	i = que->rx.sc_rx_desc_head;
2838 
2839 	bus_dmamap_sync(sc->sc_dmat, que->rx.sc_rx_dma.dma_map,
2840 	    0, que->rx.sc_rx_dma.dma_map->dm_mapsize,
2841 	    BUS_DMASYNC_POSTWRITE);
2842 
2843 	for (slots = if_rxr_get(&que->rx.sc_rx_ring, sc->sc_rx_slots);
2844 	    slots > 0; slots--) {
2845 		if (++i == sc->sc_rx_slots)
2846 			i = 0;
2847 
2848 		if (em_get_buf(que, i) != 0)
2849 			break;
2850 
2851 		que->rx.sc_rx_desc_head = i;
2852 		post = 1;
2853 	}
2854 
2855 	if_rxr_put(&que->rx.sc_rx_ring, slots);
2856 
2857 	bus_dmamap_sync(sc->sc_dmat, que->rx.sc_rx_dma.dma_map,
2858 	    0, que->rx.sc_rx_dma.dma_map->dm_mapsize,
2859 	    BUS_DMASYNC_PREWRITE);
2860 
2861 	return (post);
2862 }
2863 
2864 void
2865 em_rxrefill(void *arg)
2866 {
2867 	struct em_queue *que = arg;
2868 	struct em_softc *sc = que->sc;
2869 
2870 	if (em_rxfill(que))
2871 		E1000_WRITE_REG(&sc->hw, RDT(que->me), que->rx.sc_rx_desc_head);
2872 	else if (if_rxr_needrefill(&que->rx.sc_rx_ring))
2873 		timeout_add(&que->rx_refill, 1);
2874 }
2875 
2876 /*********************************************************************
2877  *
2878  *  This routine executes in interrupt context. It replenishes
2879  *  the mbufs in the descriptor and sends data which has been
2880  *  dma'ed into host memory to upper layer.
2881  *
2882  *********************************************************************/
2883 int
2884 em_rxeof(struct em_queue *que)
2885 {
2886 	struct em_softc	    *sc = que->sc;
2887 	struct ifnet	    *ifp = &sc->sc_ac.ac_if;
2888 	struct mbuf_list    ml = MBUF_LIST_INITIALIZER();
2889 	struct mbuf	    *m;
2890 	u_int8_t	    accept_frame = 0;
2891 	u_int8_t	    eop = 0;
2892 	u_int16_t	    len, desc_len, prev_len_adj;
2893 	int		    i, rv = 0;
2894 
2895 	/* Pointer to the receive descriptor being examined. */
2896 	struct em_rx_desc   *desc;
2897 	struct em_packet    *pkt;
2898 	u_int8_t	    status;
2899 
2900 	if (if_rxr_inuse(&que->rx.sc_rx_ring) == 0)
2901 		return (0);
2902 
2903 	i = que->rx.sc_rx_desc_tail;
2904 
2905 	bus_dmamap_sync(sc->sc_dmat, que->rx.sc_rx_dma.dma_map,
2906 	    0, que->rx.sc_rx_dma.dma_map->dm_mapsize,
2907 	    BUS_DMASYNC_POSTREAD);
2908 
2909 	do {
2910 		m = NULL;
2911 
2912 		pkt = &que->rx.sc_rx_pkts_ring[i];
2913 		desc = &que->rx.sc_rx_desc_ring[i];
2914 
2915 		status = desc->status;
2916 		if (!ISSET(status, E1000_RXD_STAT_DD))
2917 			break;
2918 
2919 		/* pull the mbuf off the ring */
2920 		bus_dmamap_sync(sc->sc_dmat, pkt->pkt_map,
2921 		    0, pkt->pkt_map->dm_mapsize,
2922 		    BUS_DMASYNC_POSTREAD);
2923 		bus_dmamap_unload(sc->sc_dmat, pkt->pkt_map);
2924 		m = pkt->pkt_m;
2925 		pkt->pkt_m = NULL;
2926 
2927 		KASSERT(m != NULL);
2928 
2929 		if_rxr_put(&que->rx.sc_rx_ring, 1);
2930 		rv = 1;
2931 
2932 		accept_frame = 1;
2933 		prev_len_adj = 0;
2934 		desc_len = letoh16(desc->length);
2935 
2936 		if (status & E1000_RXD_STAT_EOP) {
2937 			eop = 1;
2938 			if (desc_len < ETHER_CRC_LEN) {
2939 				len = 0;
2940 				prev_len_adj = ETHER_CRC_LEN - desc_len;
2941 			} else if (sc->hw.mac_type == em_i210 ||
2942 			    sc->hw.mac_type == em_i350)
2943 				len = desc_len;
2944 			else
2945 				len = desc_len - ETHER_CRC_LEN;
2946 		} else {
2947 			eop = 0;
2948 			len = desc_len;
2949 		}
2950 
2951 		if (desc->errors & E1000_RXD_ERR_FRAME_ERR_MASK) {
2952 			u_int8_t last_byte;
2953 			u_int32_t pkt_len = desc_len;
2954 
2955 			if (que->rx.fmp != NULL)
2956 				pkt_len += que->rx.fmp->m_pkthdr.len;
2957 
2958 			last_byte = *(mtod(m, caddr_t) + desc_len - 1);
2959 			if (TBI_ACCEPT(&sc->hw, status, desc->errors,
2960 			    pkt_len, last_byte)) {
2961 #if NKSTAT > 0
2962 				em_tbi_adjust_stats(sc,
2963 				    pkt_len, sc->hw.mac_addr);
2964 #endif
2965 				if (len > 0)
2966 					len--;
2967 			} else
2968 				accept_frame = 0;
2969 		}
2970 
2971 		if (accept_frame) {
2972 			/* Assign correct length to the current fragment */
2973 			m->m_len = len;
2974 
2975 			if (que->rx.fmp == NULL) {
2976 				m->m_pkthdr.len = m->m_len;
2977 				que->rx.fmp = m;	 /* Store the first mbuf */
2978 				que->rx.lmp = m;
2979 			} else {
2980 				/* Chain mbuf's together */
2981 				m->m_flags &= ~M_PKTHDR;
2982 				/*
2983 				 * Adjust length of previous mbuf in chain if
2984 				 * we received less than 4 bytes in the last
2985 				 * descriptor.
2986 				 */
2987 				if (prev_len_adj > 0) {
2988 					que->rx.lmp->m_len -= prev_len_adj;
2989 					que->rx.fmp->m_pkthdr.len -= prev_len_adj;
2990 				}
2991 				que->rx.lmp->m_next = m;
2992 				que->rx.lmp = m;
2993 				que->rx.fmp->m_pkthdr.len += m->m_len;
2994 			}
2995 
2996 			if (eop) {
2997 				m = que->rx.fmp;
2998 
2999 				em_receive_checksum(sc, desc, m);
3000 #if NVLAN > 0
3001 				if (desc->status & E1000_RXD_STAT_VP) {
3002 					m->m_pkthdr.ether_vtag =
3003 					    letoh16(desc->special);
3004 					m->m_flags |= M_VLANTAG;
3005 				}
3006 #endif
3007 				ml_enqueue(&ml, m);
3008 
3009 				que->rx.fmp = NULL;
3010 				que->rx.lmp = NULL;
3011 			}
3012 		} else {
3013 			que->rx.dropped_pkts++;
3014 
3015 			if (que->rx.fmp != NULL) {
3016 				m_freem(que->rx.fmp);
3017 				que->rx.fmp = NULL;
3018 				que->rx.lmp = NULL;
3019 			}
3020 
3021 			m_freem(m);
3022 		}
3023 
3024 		/* Advance our pointers to the next descriptor. */
3025 		if (++i == sc->sc_rx_slots)
3026 			i = 0;
3027 	} while (if_rxr_inuse(&que->rx.sc_rx_ring) > 0);
3028 
3029 	bus_dmamap_sync(sc->sc_dmat, que->rx.sc_rx_dma.dma_map,
3030 	    0, que->rx.sc_rx_dma.dma_map->dm_mapsize,
3031 	    BUS_DMASYNC_PREREAD);
3032 
3033 	que->rx.sc_rx_desc_tail = i;
3034 
3035 	if (ifiq_input(&ifp->if_rcv, &ml))
3036 		if_rxr_livelocked(&que->rx.sc_rx_ring);
3037 
3038 	return (rv);
3039 }
3040 
3041 /*********************************************************************
3042  *
3043  *  Verify that the hardware indicated that the checksum is valid.
3044  *  Inform the stack about the status of checksum so that stack
3045  *  doesn't spend time verifying the checksum.
3046  *
3047  *********************************************************************/
3048 void
3049 em_receive_checksum(struct em_softc *sc, struct em_rx_desc *rx_desc,
3050     struct mbuf *mp)
3051 {
3052 	/* 82543 or newer only */
3053 	if ((sc->hw.mac_type < em_82543) ||
3054 	    /* Ignore Checksum bit is set */
3055 	    (rx_desc->status & E1000_RXD_STAT_IXSM)) {
3056 		mp->m_pkthdr.csum_flags = 0;
3057 		return;
3058 	}
3059 
3060 	if (rx_desc->status & E1000_RXD_STAT_IPCS) {
3061 		/* Did it pass? */
3062 		if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
3063 			/* IP Checksum Good */
3064 			mp->m_pkthdr.csum_flags = M_IPV4_CSUM_IN_OK;
3065 
3066 		} else
3067 			mp->m_pkthdr.csum_flags = 0;
3068 	}
3069 
3070 	if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
3071 		/* Did it pass? */
3072 		if (!(rx_desc->errors & E1000_RXD_ERR_TCPE))
3073 			mp->m_pkthdr.csum_flags |=
3074 				M_TCP_CSUM_IN_OK | M_UDP_CSUM_IN_OK;
3075 	}
3076 }
3077 
3078 /*
3079  * This turns on the hardware offload of the VLAN
3080  * tag insertion and strip
3081  */
3082 void
3083 em_enable_hw_vlans(struct em_softc *sc)
3084 {
3085 	uint32_t ctrl;
3086 
3087 	ctrl = E1000_READ_REG(&sc->hw, CTRL);
3088 	ctrl |= E1000_CTRL_VME;
3089 	E1000_WRITE_REG(&sc->hw, CTRL, ctrl);
3090 }
3091 
3092 void
3093 em_enable_intr(struct em_softc *sc)
3094 {
3095 	uint32_t mask;
3096 
3097 	if (sc->msix) {
3098 		mask = sc->msix_queuesmask | sc->msix_linkmask;
3099 		E1000_WRITE_REG(&sc->hw, EIAC, mask);
3100 		E1000_WRITE_REG(&sc->hw, EIAM, mask);
3101 		E1000_WRITE_REG(&sc->hw, EIMS, mask);
3102 		E1000_WRITE_REG(&sc->hw, IMS, E1000_IMS_LSC);
3103 	} else
3104 		E1000_WRITE_REG(&sc->hw, IMS, (IMS_ENABLE_MASK));
3105 }
3106 
3107 void
3108 em_disable_intr(struct em_softc *sc)
3109 {
3110 	/*
3111 	 * The first version of 82542 had an errata where when link
3112 	 * was forced it would stay up even if the cable was disconnected
3113 	 * Sequence errors were used to detect the disconnect and then
3114 	 * the driver would unforce the link.  This code is in the ISR.
3115 	 * For this to work correctly the Sequence error interrupt had
3116 	 * to be enabled all the time.
3117 	 */
3118 	if (sc->msix) {
3119 		E1000_WRITE_REG(&sc->hw, EIMC, ~0);
3120 		E1000_WRITE_REG(&sc->hw, EIAC, 0);
3121 	} else if (sc->hw.mac_type == em_82542_rev2_0)
3122 		E1000_WRITE_REG(&sc->hw, IMC, (0xffffffff & ~E1000_IMC_RXSEQ));
3123 	else
3124 		E1000_WRITE_REG(&sc->hw, IMC, 0xffffffff);
3125 }
3126 
3127 void
3128 em_write_pci_cfg(struct em_hw *hw, uint32_t reg, uint16_t *value)
3129 {
3130 	struct pci_attach_args *pa = &((struct em_osdep *)hw->back)->em_pa;
3131 	pcireg_t val;
3132 
3133 	val = pci_conf_read(pa->pa_pc, pa->pa_tag, reg & ~0x3);
3134 	if (reg & 0x2) {
3135 		val &= 0x0000ffff;
3136 		val |= (*value << 16);
3137 	} else {
3138 		val &= 0xffff0000;
3139 		val |= *value;
3140 	}
3141 	pci_conf_write(pa->pa_pc, pa->pa_tag, reg & ~0x3, val);
3142 }
3143 
3144 void
3145 em_read_pci_cfg(struct em_hw *hw, uint32_t reg, uint16_t *value)
3146 {
3147 	struct pci_attach_args *pa = &((struct em_osdep *)hw->back)->em_pa;
3148 	pcireg_t val;
3149 
3150 	val = pci_conf_read(pa->pa_pc, pa->pa_tag, reg & ~0x3);
3151 	if (reg & 0x2)
3152 		*value = (val >> 16) & 0xffff;
3153 	else
3154 		*value = val & 0xffff;
3155 }
3156 
3157 void
3158 em_pci_set_mwi(struct em_hw *hw)
3159 {
3160 	struct pci_attach_args *pa = &((struct em_osdep *)hw->back)->em_pa;
3161 
3162 	pci_conf_write(pa->pa_pc, pa->pa_tag, PCI_COMMAND_STATUS_REG,
3163 		(hw->pci_cmd_word | CMD_MEM_WRT_INVALIDATE));
3164 }
3165 
3166 void
3167 em_pci_clear_mwi(struct em_hw *hw)
3168 {
3169 	struct pci_attach_args *pa = &((struct em_osdep *)hw->back)->em_pa;
3170 
3171 	pci_conf_write(pa->pa_pc, pa->pa_tag, PCI_COMMAND_STATUS_REG,
3172 		(hw->pci_cmd_word & ~CMD_MEM_WRT_INVALIDATE));
3173 }
3174 
3175 /*
3176  * We may eventually really do this, but its unnecessary
3177  * for now so we just return unsupported.
3178  */
3179 int32_t
3180 em_read_pcie_cap_reg(struct em_hw *hw, uint32_t reg, uint16_t *value)
3181 {
3182 	return -E1000_NOT_IMPLEMENTED;
3183 }
3184 
3185 /*********************************************************************
3186 * 82544 Coexistence issue workaround.
3187 *    There are 2 issues.
3188 *       1. Transmit Hang issue.
3189 *    To detect this issue, following equation can be used...
3190 *          SIZE[3:0] + ADDR[2:0] = SUM[3:0].
3191 *          If SUM[3:0] is in between 1 to 4, we will have this issue.
3192 *
3193 *       2. DAC issue.
3194 *    To detect this issue, following equation can be used...
3195 *          SIZE[3:0] + ADDR[2:0] = SUM[3:0].
3196 *          If SUM[3:0] is in between 9 to c, we will have this issue.
3197 *
3198 *
3199 *    WORKAROUND:
3200 *          Make sure we do not have ending address as 1,2,3,4(Hang) or 9,a,b,c (DAC)
3201 *
3202 *** *********************************************************************/
3203 u_int32_t
3204 em_fill_descriptors(u_int64_t address, u_int32_t length,
3205     PDESC_ARRAY desc_array)
3206 {
3207         /* Since issue is sensitive to length and address.*/
3208         /* Let us first check the address...*/
3209         u_int32_t safe_terminator;
3210         if (length <= 4) {
3211                 desc_array->descriptor[0].address = address;
3212                 desc_array->descriptor[0].length = length;
3213                 desc_array->elements = 1;
3214                 return desc_array->elements;
3215         }
3216         safe_terminator = (u_int32_t)((((u_int32_t)address & 0x7) + (length & 0xF)) & 0xF);
3217         /* if it does not fall between 0x1 to 0x4 and 0x9 to 0xC then return */
3218         if (safe_terminator == 0   ||
3219         (safe_terminator > 4   &&
3220         safe_terminator < 9)   ||
3221         (safe_terminator > 0xC &&
3222         safe_terminator <= 0xF)) {
3223                 desc_array->descriptor[0].address = address;
3224                 desc_array->descriptor[0].length = length;
3225                 desc_array->elements = 1;
3226                 return desc_array->elements;
3227         }
3228 
3229         desc_array->descriptor[0].address = address;
3230         desc_array->descriptor[0].length = length - 4;
3231         desc_array->descriptor[1].address = address + (length - 4);
3232         desc_array->descriptor[1].length = 4;
3233         desc_array->elements = 2;
3234         return desc_array->elements;
3235 }
3236 
3237 /*
3238  * Disable the L0S and L1 LINK states.
3239  */
3240 void
3241 em_disable_aspm(struct em_softc *sc)
3242 {
3243 	int offset;
3244 	pcireg_t val;
3245 
3246 	switch (sc->hw.mac_type) {
3247 		case em_82571:
3248 		case em_82572:
3249 		case em_82573:
3250 		case em_82574:
3251 			break;
3252 		default:
3253 			return;
3254 	}
3255 
3256 	if (!pci_get_capability(sc->osdep.em_pa.pa_pc, sc->osdep.em_pa.pa_tag,
3257 	    PCI_CAP_PCIEXPRESS, &offset, NULL))
3258 		return;
3259 
3260 	/* Disable PCIe Active State Power Management (ASPM). */
3261 	val = pci_conf_read(sc->osdep.em_pa.pa_pc, sc->osdep.em_pa.pa_tag,
3262 	    offset + PCI_PCIE_LCSR);
3263 
3264 	switch (sc->hw.mac_type) {
3265 		case em_82571:
3266 		case em_82572:
3267 			val &= ~PCI_PCIE_LCSR_ASPM_L1;
3268 			break;
3269 		case em_82573:
3270 		case em_82574:
3271 			val &= ~(PCI_PCIE_LCSR_ASPM_L0S |
3272 			    PCI_PCIE_LCSR_ASPM_L1);
3273 			break;
3274 		default:
3275 			break;
3276 	}
3277 
3278 	pci_conf_write(sc->osdep.em_pa.pa_pc, sc->osdep.em_pa.pa_tag,
3279 	    offset + PCI_PCIE_LCSR, val);
3280 }
3281 
3282 /*
3283  * em_flush_tx_ring - remove all descriptors from the tx_ring
3284  *
3285  * We want to clear all pending descriptors from the TX ring.
3286  * zeroing happens when the HW reads the regs. We assign the ring itself as
3287  * the data of the next descriptor. We don't care about the data we are about
3288  * to reset the HW.
3289  */
3290 void
3291 em_flush_tx_ring(struct em_queue *que)
3292 {
3293 	struct em_softc		*sc = que->sc;
3294 	uint32_t		 tctl, txd_lower = E1000_TXD_CMD_IFCS;
3295 	uint16_t		 size = 512;
3296 	struct em_tx_desc	*txd;
3297 
3298 	KASSERT(que->tx.sc_tx_desc_ring != NULL);
3299 
3300 	tctl = EM_READ_REG(&sc->hw, E1000_TCTL);
3301 	EM_WRITE_REG(&sc->hw, E1000_TCTL, tctl | E1000_TCTL_EN);
3302 
3303 	KASSERT(EM_READ_REG(&sc->hw, E1000_TDT(que->me)) == que->tx.sc_tx_desc_head);
3304 
3305 	txd = &que->tx.sc_tx_desc_ring[que->tx.sc_tx_desc_head];
3306 	txd->buffer_addr = que->tx.sc_tx_dma.dma_map->dm_segs[0].ds_addr;
3307 	txd->lower.data = htole32(txd_lower | size);
3308 	txd->upper.data = 0;
3309 
3310 	/* flush descriptors to memory before notifying the HW */
3311 	bus_space_barrier(sc->osdep.mem_bus_space_tag,
3312 	    sc->osdep.mem_bus_space_handle, 0, 0, BUS_SPACE_BARRIER_WRITE);
3313 
3314 	if (++que->tx.sc_tx_desc_head == sc->sc_tx_slots)
3315 		que->tx.sc_tx_desc_head = 0;
3316 
3317 	EM_WRITE_REG(&sc->hw, E1000_TDT(que->me), que->tx.sc_tx_desc_head);
3318 	bus_space_barrier(sc->osdep.mem_bus_space_tag, sc->osdep.mem_bus_space_handle,
3319 	    0, 0, BUS_SPACE_BARRIER_READ|BUS_SPACE_BARRIER_WRITE);
3320 	usec_delay(250);
3321 }
3322 
3323 /*
3324  * em_flush_rx_ring - remove all descriptors from the rx_ring
3325  *
3326  * Mark all descriptors in the RX ring as consumed and disable the rx ring
3327  */
3328 void
3329 em_flush_rx_ring(struct em_queue *que)
3330 {
3331 	uint32_t	rctl, rxdctl;
3332 	struct em_softc	*sc = que->sc;
3333 
3334 	rctl = EM_READ_REG(&sc->hw, E1000_RCTL);
3335 	EM_WRITE_REG(&sc->hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
3336 	E1000_WRITE_FLUSH(&sc->hw);
3337 	usec_delay(150);
3338 
3339 	rxdctl = EM_READ_REG(&sc->hw, E1000_RXDCTL(que->me));
3340 	/* zero the lower 14 bits (prefetch and host thresholds) */
3341 	rxdctl &= 0xffffc000;
3342 	/*
3343 	 * update thresholds: prefetch threshold to 31, host threshold to 1
3344 	 * and make sure the granularity is "descriptors" and not "cache lines"
3345 	 */
3346 	rxdctl |= (0x1F | (1 << 8) | E1000_RXDCTL_THRESH_UNIT_DESC);
3347 	EM_WRITE_REG(&sc->hw, E1000_RXDCTL(que->me), rxdctl);
3348 
3349 	/* momentarily enable the RX ring for the changes to take effect */
3350 	EM_WRITE_REG(&sc->hw, E1000_RCTL, rctl | E1000_RCTL_EN);
3351 	E1000_WRITE_FLUSH(&sc->hw);
3352 	usec_delay(150);
3353 	EM_WRITE_REG(&sc->hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
3354 }
3355 
3356 /*
3357  * em_flush_desc_rings - remove all descriptors from the descriptor rings
3358  *
3359  * In i219, the descriptor rings must be emptied before resetting the HW
3360  * or before changing the device state to D3 during runtime (runtime PM).
3361  *
3362  * Failure to do this will cause the HW to enter a unit hang state which can
3363  * only be released by PCI reset on the device
3364  *
3365  */
3366 void
3367 em_flush_desc_rings(struct em_softc *sc)
3368 {
3369 	struct em_queue		*que = sc->queues; /* Use only first queue. */
3370 	struct pci_attach_args	*pa = &sc->osdep.em_pa;
3371 	uint32_t		 fextnvm11, tdlen;
3372 	uint16_t		 hang_state;
3373 
3374 	/* First, disable MULR fix in FEXTNVM11 */
3375 	fextnvm11 = EM_READ_REG(&sc->hw, E1000_FEXTNVM11);
3376 	fextnvm11 |= E1000_FEXTNVM11_DISABLE_MULR_FIX;
3377 	EM_WRITE_REG(&sc->hw, E1000_FEXTNVM11, fextnvm11);
3378 
3379 	/* do nothing if we're not in faulty state, or if the queue is empty */
3380 	tdlen = EM_READ_REG(&sc->hw, E1000_TDLEN(que->me));
3381 	hang_state = pci_conf_read(pa->pa_pc, pa->pa_tag, PCICFG_DESC_RING_STATUS);
3382 	if (!(hang_state & FLUSH_DESC_REQUIRED) || !tdlen)
3383 		return;
3384 	em_flush_tx_ring(que);
3385 
3386 	/* recheck, maybe the fault is caused by the rx ring */
3387 	hang_state = pci_conf_read(pa->pa_pc, pa->pa_tag, PCICFG_DESC_RING_STATUS);
3388 	if (hang_state & FLUSH_DESC_REQUIRED)
3389 		em_flush_rx_ring(que);
3390 }
3391 
3392 int
3393 em_allocate_legacy(struct em_softc *sc)
3394 {
3395 	pci_intr_handle_t	 ih;
3396 	const char		*intrstr = NULL;
3397 	struct pci_attach_args	*pa = &sc->osdep.em_pa;
3398 	pci_chipset_tag_t	 pc = pa->pa_pc;
3399 
3400 	if (pci_intr_map_msi(pa, &ih)) {
3401 		if (pci_intr_map(pa, &ih)) {
3402 			printf(": couldn't map interrupt\n");
3403 			return (ENXIO);
3404 		}
3405 		sc->legacy_irq = 1;
3406 	}
3407 
3408 	intrstr = pci_intr_string(pc, ih);
3409 	sc->sc_intrhand = pci_intr_establish(pc, ih, IPL_NET | IPL_MPSAFE,
3410 	    em_intr, sc, DEVNAME(sc));
3411 	if (sc->sc_intrhand == NULL) {
3412 		printf(": couldn't establish interrupt");
3413 		if (intrstr != NULL)
3414 			printf(" at %s", intrstr);
3415 		printf("\n");
3416 		return (ENXIO);
3417 	}
3418 	printf(": %s", intrstr);
3419 
3420 	return (0);
3421 }
3422 
3423 #if NKSTAT > 0
3424 /* this is used to look up the array of kstats quickly */
3425 enum em_stat {
3426 	em_stat_crcerrs,
3427 	em_stat_algnerrc,
3428 	em_stat_symerrs,
3429 	em_stat_rxerrc,
3430 	em_stat_mpc,
3431 	em_stat_scc,
3432 	em_stat_ecol,
3433 	em_stat_mcc,
3434 	em_stat_latecol,
3435 	em_stat_colc,
3436 	em_stat_dc,
3437 	em_stat_tncrs,
3438 	em_stat_sec,
3439 	em_stat_cexterr,
3440 	em_stat_rlec,
3441 	em_stat_xonrxc,
3442 	em_stat_xontxc,
3443 	em_stat_xoffrxc,
3444 	em_stat_xofftxc,
3445 	em_stat_fcruc,
3446 	em_stat_prc64,
3447 	em_stat_prc127,
3448 	em_stat_prc255,
3449 	em_stat_prc511,
3450 	em_stat_prc1023,
3451 	em_stat_prc1522,
3452 	em_stat_gprc,
3453 	em_stat_bprc,
3454 	em_stat_mprc,
3455 	em_stat_gptc,
3456 	em_stat_gorc,
3457 	em_stat_gotc,
3458 	em_stat_rnbc,
3459 	em_stat_ruc,
3460 	em_stat_rfc,
3461 	em_stat_roc,
3462 	em_stat_rjc,
3463 	em_stat_mgtprc,
3464 	em_stat_mgtpdc,
3465 	em_stat_mgtptc,
3466 	em_stat_tor,
3467 	em_stat_tot,
3468 	em_stat_tpr,
3469 	em_stat_tpt,
3470 	em_stat_ptc64,
3471 	em_stat_ptc127,
3472 	em_stat_ptc255,
3473 	em_stat_ptc511,
3474 	em_stat_ptc1023,
3475 	em_stat_ptc1522,
3476 	em_stat_mptc,
3477 	em_stat_bptc,
3478 #if 0
3479 	em_stat_tsctc,
3480 	em_stat_tsctf,
3481 #endif
3482 
3483 	em_stat_count,
3484 };
3485 
3486 struct em_counter {
3487 	const char		*name;
3488 	enum kstat_kv_unit	 unit;
3489 	uint32_t		 reg;
3490 };
3491 
3492 static const struct em_counter em_counters[em_stat_count] = {
3493 	[em_stat_crcerrs] =
3494 	    { "rx crc errs",	KSTAT_KV_U_PACKETS,	E1000_CRCERRS },
3495 	[em_stat_algnerrc] = /* >= em_82543 */
3496 	    { "rx align errs",	KSTAT_KV_U_PACKETS,	0 },
3497 	[em_stat_symerrs] = /* >= em_82543 */
3498 	    { "rx align errs",	KSTAT_KV_U_PACKETS,	0 },
3499 	[em_stat_rxerrc] =
3500 	    { "rx errs",	KSTAT_KV_U_PACKETS,	E1000_RXERRC },
3501 	[em_stat_mpc] =
3502 	    { "rx missed",	KSTAT_KV_U_PACKETS,	E1000_MPC },
3503 	[em_stat_scc] =
3504 	    { "tx single coll",	KSTAT_KV_U_PACKETS,	E1000_SCC },
3505 	[em_stat_ecol] =
3506 	    { "tx excess coll",	KSTAT_KV_U_PACKETS,	E1000_ECOL },
3507 	[em_stat_mcc] =
3508 	    { "tx multi coll",	KSTAT_KV_U_PACKETS,	E1000_MCC },
3509 	[em_stat_latecol] =
3510 	    { "tx late coll",	KSTAT_KV_U_PACKETS,	E1000_LATECOL },
3511 	[em_stat_colc] =
3512 	    { "tx coll",	KSTAT_KV_U_NONE,	E1000_COLC },
3513 	[em_stat_dc] =
3514 	    { "tx defers",	KSTAT_KV_U_NONE,	E1000_DC },
3515 	[em_stat_tncrs] = /* >= em_82543 */
3516 	    { "tx no CRS",	KSTAT_KV_U_PACKETS,	0 },
3517 	[em_stat_sec] =
3518 	    { "seq errs",	KSTAT_KV_U_NONE,	E1000_SEC },
3519 	[em_stat_cexterr] = /* >= em_82543 */
3520 	    { "carr ext errs",	KSTAT_KV_U_PACKETS,	0 },
3521 	[em_stat_rlec] =
3522 	    { "rx len errs",	KSTAT_KV_U_PACKETS,	E1000_RLEC },
3523 	[em_stat_xonrxc] =
3524 	    { "rx xon",		KSTAT_KV_U_PACKETS,	E1000_XONRXC },
3525 	[em_stat_xontxc] =
3526 	    { "tx xon",		KSTAT_KV_U_PACKETS,	E1000_XONTXC },
3527 	[em_stat_xoffrxc] =
3528 	    { "rx xoff",	KSTAT_KV_U_PACKETS,	E1000_XOFFRXC },
3529 	[em_stat_xofftxc] =
3530 	    { "tx xoff",	KSTAT_KV_U_PACKETS,	E1000_XOFFTXC },
3531 	[em_stat_fcruc] =
3532 	    { "FC unsupported",	KSTAT_KV_U_PACKETS,	E1000_FCRUC },
3533 	[em_stat_prc64] =
3534 	    { "rx 64B",		KSTAT_KV_U_PACKETS,	E1000_PRC64 },
3535 	[em_stat_prc127] =
3536 	    { "rx 65-127B",	KSTAT_KV_U_PACKETS,	E1000_PRC127 },
3537 	[em_stat_prc255] =
3538 	    { "rx 128-255B",	KSTAT_KV_U_PACKETS,	E1000_PRC255 },
3539 	[em_stat_prc511] =
3540 	    { "rx 256-511B",	KSTAT_KV_U_PACKETS,	E1000_PRC511 },
3541 	[em_stat_prc1023] =
3542 	    { "rx 512-1023B",	KSTAT_KV_U_PACKETS,	E1000_PRC1023 },
3543 	[em_stat_prc1522] =
3544 	    { "rx 1024-maxB",	KSTAT_KV_U_PACKETS,	E1000_PRC1522 },
3545 	[em_stat_gprc] =
3546 	    { "rx good",	KSTAT_KV_U_PACKETS,	E1000_GPRC },
3547 	[em_stat_bprc] =
3548 	    { "rx bcast",	KSTAT_KV_U_PACKETS,	E1000_BPRC },
3549 	[em_stat_mprc] =
3550 	    { "rx mcast",	KSTAT_KV_U_PACKETS,	E1000_MPRC },
3551 	[em_stat_gptc] =
3552 	    { "tx good",	KSTAT_KV_U_PACKETS,	E1000_GPTC },
3553 	[em_stat_gorc] = /* 64bit */
3554 	    { "rx good",	KSTAT_KV_U_BYTES,	0 },
3555 	[em_stat_gotc] = /* 64bit */
3556 	    { "tx good",	KSTAT_KV_U_BYTES,	0 },
3557 	[em_stat_rnbc] =
3558 	    { "rx no buffers",	KSTAT_KV_U_PACKETS,	E1000_RNBC },
3559 	[em_stat_ruc] =
3560 	    { "rx undersize",	KSTAT_KV_U_PACKETS,	E1000_RUC },
3561 	[em_stat_rfc] =
3562 	    { "rx fragments",	KSTAT_KV_U_PACKETS,	E1000_RFC },
3563 	[em_stat_roc] =
3564 	    { "rx oversize",	KSTAT_KV_U_PACKETS,	E1000_ROC },
3565 	[em_stat_rjc] =
3566 	    { "rx jabbers",	KSTAT_KV_U_PACKETS,	E1000_RJC },
3567 	[em_stat_mgtprc] =
3568 	    { "rx mgmt",	KSTAT_KV_U_PACKETS,	E1000_MGTPRC },
3569 	[em_stat_mgtpdc] =
3570 	    { "rx mgmt drops",	KSTAT_KV_U_PACKETS,	E1000_MGTPDC },
3571 	[em_stat_mgtptc] =
3572 	    { "tx mgmt",	KSTAT_KV_U_PACKETS,	E1000_MGTPTC },
3573 	[em_stat_tor] = /* 64bit */
3574 	    { "rx total",	KSTAT_KV_U_BYTES,	0 },
3575 	[em_stat_tot] = /* 64bit */
3576 	    { "tx total",	KSTAT_KV_U_BYTES,	0 },
3577 	[em_stat_tpr] =
3578 	    { "rx total",	KSTAT_KV_U_PACKETS,	E1000_TPR },
3579 	[em_stat_tpt] =
3580 	    { "tx total",	KSTAT_KV_U_PACKETS,	E1000_TPT },
3581 	[em_stat_ptc64] =
3582 	    { "tx 64B",		KSTAT_KV_U_PACKETS,	E1000_PTC64 },
3583 	[em_stat_ptc127] =
3584 	    { "tx 65-127B",	KSTAT_KV_U_PACKETS,	E1000_PTC127 },
3585 	[em_stat_ptc255] =
3586 	    { "tx 128-255B",	KSTAT_KV_U_PACKETS,	E1000_PTC255 },
3587 	[em_stat_ptc511] =
3588 	    { "tx 256-511B",	KSTAT_KV_U_PACKETS,	E1000_PTC511 },
3589 	[em_stat_ptc1023] =
3590 	    { "tx 512-1023B",	KSTAT_KV_U_PACKETS,	E1000_PTC1023 },
3591 	[em_stat_ptc1522] =
3592 	    { "tx 1024-maxB",	KSTAT_KV_U_PACKETS,	E1000_PTC1522 },
3593 	[em_stat_mptc] =
3594 	    { "tx mcast",	KSTAT_KV_U_PACKETS,	E1000_MPTC },
3595 	[em_stat_bptc] =
3596 	    { "tx bcast",	KSTAT_KV_U_PACKETS,	E1000_BPTC },
3597 };
3598 
3599 /**********************************************************************
3600  *
3601  *  Update the board statistics counters.
3602  *
3603  **********************************************************************/
3604 int
3605 em_kstat_read(struct kstat *ks)
3606 {
3607 	struct em_softc *sc = ks->ks_softc;
3608 	struct em_hw *hw = &sc->hw;
3609 	struct kstat_kv *kvs = ks->ks_data;
3610 	uint32_t lo, hi;
3611 	unsigned int i;
3612 
3613 	for (i = 0; i < nitems(em_counters); i++) {
3614 		const struct em_counter *c = &em_counters[i];
3615 		if (c->reg == 0)
3616 			continue;
3617 
3618 		kstat_kv_u64(&kvs[i]) += EM_READ_REG(hw,
3619 		    E1000_REG_TR(hw, c->reg)); /* wtf */
3620 	}
3621 
3622 	/* Handle the exceptions. */
3623 
3624 	if (sc->hw.mac_type >= em_82543) {
3625 		kstat_kv_u64(&kvs[em_stat_algnerrc]) +=
3626 		    E1000_READ_REG(hw, ALGNERRC);
3627 		kstat_kv_u64(&kvs[em_stat_rxerrc]) +=
3628 		    E1000_READ_REG(hw, RXERRC);
3629 		kstat_kv_u64(&kvs[em_stat_cexterr]) +=
3630 		    E1000_READ_REG(hw, CEXTERR);
3631 		kstat_kv_u64(&kvs[em_stat_tncrs]) +=
3632 		    E1000_READ_REG(hw, TNCRS);
3633 #if 0
3634 		sc->stats.tsctc +=
3635 		E1000_READ_REG(hw, TSCTC);
3636 		sc->stats.tsctfc +=
3637 		E1000_READ_REG(hw, TSCTFC);
3638 #endif
3639 	}
3640 
3641 	/* For the 64-bit byte counters the low dword must be read first. */
3642 	/* Both registers clear on the read of the high dword */
3643 
3644 	lo = E1000_READ_REG(hw, GORCL);
3645 	hi = E1000_READ_REG(hw, GORCH);
3646 	kstat_kv_u64(&kvs[em_stat_gorc]) +=
3647 	    ((uint64_t)hi << 32) | (uint64_t)lo;
3648 
3649 	lo = E1000_READ_REG(hw, GOTCL);
3650 	hi = E1000_READ_REG(hw, GOTCH);
3651 	kstat_kv_u64(&kvs[em_stat_gotc]) +=
3652 	    ((uint64_t)hi << 32) | (uint64_t)lo;
3653 
3654 	lo = E1000_READ_REG(hw, TORL);
3655 	hi = E1000_READ_REG(hw, TORH);
3656 	kstat_kv_u64(&kvs[em_stat_tor]) +=
3657 	    ((uint64_t)hi << 32) | (uint64_t)lo;
3658 
3659 	lo = E1000_READ_REG(hw, TOTL);
3660 	hi = E1000_READ_REG(hw, TOTH);
3661 	kstat_kv_u64(&kvs[em_stat_tot]) +=
3662 	    ((uint64_t)hi << 32) | (uint64_t)lo;
3663 
3664 	getnanouptime(&ks->ks_updated);
3665 
3666 	return (0);
3667 }
3668 
3669 void
3670 em_kstat_attach(struct em_softc *sc)
3671 {
3672 	struct kstat *ks;
3673 	struct kstat_kv *kvs;
3674 	unsigned int i;
3675 
3676 	mtx_init(&sc->kstat_mtx, IPL_SOFTCLOCK);
3677 
3678 	ks = kstat_create(DEVNAME(sc), 0, "em-stats", 0,
3679 	    KSTAT_T_KV, 0);
3680 	if (ks == NULL)
3681 		return;
3682 
3683 	kvs = mallocarray(nitems(em_counters), sizeof(*kvs),
3684 	    M_DEVBUF, M_WAITOK|M_ZERO);
3685 	for (i = 0; i < nitems(em_counters); i++) {
3686 		const struct em_counter *c = &em_counters[i];
3687 		kstat_kv_unit_init(&kvs[i], c->name,
3688 		    KSTAT_KV_T_COUNTER64, c->unit);
3689 	}
3690 
3691 	ks->ks_softc = sc;
3692 	ks->ks_data = kvs;
3693 	ks->ks_datalen = nitems(em_counters) * sizeof(*kvs);
3694 	ks->ks_read = em_kstat_read;
3695 	kstat_set_mutex(ks, &sc->kstat_mtx);
3696 
3697 	kstat_install(ks);
3698 }
3699 
3700 /******************************************************************************
3701  * Adjusts the statistic counters when a frame is accepted by TBI_ACCEPT
3702  *****************************************************************************/
3703 void
3704 em_tbi_adjust_stats(struct em_softc *sc, uint32_t frame_len, uint8_t *mac_addr)
3705 {
3706 	struct em_hw *hw = &sc->hw;
3707 	struct kstat *ks = sc->kstat;
3708 	struct kstat_kv *kvs;
3709 
3710 	if (ks == NULL)
3711 		return;
3712 
3713 	/* First adjust the frame length. */
3714 	frame_len--;
3715 
3716 	mtx_enter(&sc->kstat_mtx);
3717 	kvs = ks->ks_data;
3718 
3719 	/*
3720 	 * We need to adjust the statistics counters, since the hardware
3721 	 * counters overcount this packet as a CRC error and undercount the
3722 	 * packet as a good packet
3723 	 */
3724 
3725 	/* This packet should not be counted as a CRC error.	*/
3726 	kstat_kv_u64(&kvs[em_stat_crcerrs])--;
3727 	/* This packet does count as a Good Packet Received.	*/
3728 	kstat_kv_u64(&kvs[em_stat_gprc])++;
3729 
3730 	/* Adjust the Good Octets received counters		*/
3731 	kstat_kv_u64(&kvs[em_stat_gorc]) += frame_len;
3732 
3733 	/*
3734 	 * Is this a broadcast or multicast?  Check broadcast first, since
3735 	 * the test for a multicast frame will test positive on a broadcast
3736 	 * frame.
3737 	 */
3738 	if (ETHER_IS_BROADCAST(mac_addr)) {
3739 		/* Broadcast packet */
3740 		kstat_kv_u64(&kvs[em_stat_bprc])++;
3741 	} else if (ETHER_IS_MULTICAST(mac_addr)) {
3742 		/* Multicast packet */
3743 		kstat_kv_u64(&kvs[em_stat_mprc])++;
3744 	}
3745 
3746 	if (frame_len == hw->max_frame_size) {
3747 		/*
3748 		 * In this case, the hardware has overcounted the number of
3749 		 * oversize frames.
3750 		 */
3751 		kstat_kv_u64(&kvs[em_stat_roc])--;
3752 	}
3753 
3754 	/*
3755 	 * Adjust the bin counters when the extra byte put the frame in the
3756 	 * wrong bin. Remember that the frame_len was adjusted above.
3757 	 */
3758 	if (frame_len == 64) {
3759 		kstat_kv_u64(&kvs[em_stat_prc64])++;
3760 		kstat_kv_u64(&kvs[em_stat_prc127])--;
3761 	} else if (frame_len == 127) {
3762 		kstat_kv_u64(&kvs[em_stat_prc127])++;
3763 		kstat_kv_u64(&kvs[em_stat_prc255])--;
3764 	} else if (frame_len == 255) {
3765 		kstat_kv_u64(&kvs[em_stat_prc255])++;
3766 		kstat_kv_u64(&kvs[em_stat_prc511])--;
3767 	} else if (frame_len == 511) {
3768 		kstat_kv_u64(&kvs[em_stat_prc511])++;
3769 		kstat_kv_u64(&kvs[em_stat_prc1023])--;
3770 	} else if (frame_len == 1023) {
3771 		kstat_kv_u64(&kvs[em_stat_prc1023])++;
3772 		kstat_kv_u64(&kvs[em_stat_prc1522])--;
3773 	} else if (frame_len == 1522) {
3774 		kstat_kv_u64(&kvs[em_stat_prc1522])++;
3775 	}
3776 
3777 	mtx_leave(&sc->kstat_mtx);
3778 }
3779 #endif /* NKSTAT > 0 */
3780 
3781 #ifndef SMALL_KERNEL
3782 int
3783 em_allocate_msix(struct em_softc *sc)
3784 {
3785 	pci_intr_handle_t	 ih;
3786 	const char		*intrstr = NULL;
3787 	struct pci_attach_args	*pa = &sc->osdep.em_pa;
3788 	pci_chipset_tag_t	 pc = pa->pa_pc;
3789 	struct em_queue		*que = sc->queues; /* Use only first queue. */
3790 	int			 vec;
3791 
3792 	if (!em_enable_msix)
3793 		return (ENODEV);
3794 
3795 	switch (sc->hw.mac_type) {
3796 	case em_82576:
3797 	case em_82580:
3798 	case em_i350:
3799 	case em_i210:
3800 		break;
3801 	default:
3802 		return (ENODEV);
3803 	}
3804 
3805 	vec = 0;
3806 	if (pci_intr_map_msix(pa, vec, &ih))
3807 		return (ENODEV);
3808 	sc->msix = 1;
3809 
3810 	que->me = vec;
3811 	que->eims = 1 << vec;
3812 	snprintf(que->name, sizeof(que->name), "%s:%d", DEVNAME(sc), vec);
3813 
3814 	intrstr = pci_intr_string(pc, ih);
3815 	que->tag = pci_intr_establish(pc, ih, IPL_NET | IPL_MPSAFE,
3816 	    em_queue_intr_msix, que, que->name);
3817 	if (que->tag == NULL) {
3818 		printf(": couldn't establish interrupt");
3819 		if (intrstr != NULL)
3820 			printf(" at %s", intrstr);
3821 		printf("\n");
3822 		return (ENXIO);
3823 	}
3824 
3825 	/* Setup linkvector, use last queue vector + 1 */
3826 	vec++;
3827 	sc->msix_linkvec = vec;
3828 	if (pci_intr_map_msix(pa, sc->msix_linkvec, &ih)) {
3829 		printf(": couldn't map link vector\n");
3830 		return (ENXIO);
3831 	}
3832 
3833 	intrstr = pci_intr_string(pc, ih);
3834 	sc->sc_intrhand = pci_intr_establish(pc, ih, IPL_NET | IPL_MPSAFE,
3835 	    em_link_intr_msix, sc, DEVNAME(sc));
3836 	if (sc->sc_intrhand == NULL) {
3837 		printf(": couldn't establish interrupt");
3838 		if (intrstr != NULL)
3839 			printf(" at %s", intrstr);
3840 		printf("\n");
3841 		return (ENXIO);
3842 	}
3843 	printf(", %s, %d queue%s", intrstr, vec, (vec > 1) ? "s" : "");
3844 
3845 	return (0);
3846 }
3847 
3848 /*
3849  * Interrupt for a specific queue, (not link interrupts). The EICR bit which
3850  * maps to the EIMS bit expresses both RX and TX, therefore we can't
3851  * distinguish if this is a RX completion of TX completion and must do both.
3852  * The bits in EICR are autocleared and we _cannot_ read EICR.
3853  */
3854 int
3855 em_queue_intr_msix(void *vque)
3856 {
3857 	struct em_queue *que = vque;
3858 	struct em_softc *sc = que->sc;
3859 	struct ifnet   *ifp = &sc->sc_ac.ac_if;
3860 
3861 	if (ifp->if_flags & IFF_RUNNING) {
3862 		em_txeof(que);
3863 		if (em_rxeof(que))
3864 			em_rxrefill(que);
3865 	}
3866 
3867 	em_enable_queue_intr_msix(que);
3868 
3869 	return (1);
3870 }
3871 
3872 int
3873 em_link_intr_msix(void *arg)
3874 {
3875 	struct em_softc *sc = arg;
3876 	uint32_t icr;
3877 
3878 	icr = E1000_READ_REG(&sc->hw, ICR);
3879 
3880 	/* Link status change */
3881 	if (icr & E1000_ICR_LSC) {
3882 		KERNEL_LOCK();
3883 		sc->hw.get_link_status = 1;
3884 		em_check_for_link(&sc->hw);
3885 		em_update_link_status(sc);
3886 		KERNEL_UNLOCK();
3887 	}
3888 
3889 	/* Re-arm unconditionally */
3890 	E1000_WRITE_REG(&sc->hw, IMS, E1000_ICR_LSC);
3891 	E1000_WRITE_REG(&sc->hw, EIMS, sc->msix_linkmask);
3892 
3893 	return (1);
3894 }
3895 
3896 /*
3897  * Maps queues into msix interrupt vectors.
3898  */
3899 int
3900 em_setup_queues_msix(struct em_softc *sc)
3901 {
3902 	uint32_t ivar, newitr, index;
3903 	struct em_queue *que;
3904 
3905 	KASSERT(sc->msix);
3906 
3907 	/* First turn on RSS capability */
3908 	if (sc->hw.mac_type != em_82575)
3909 		E1000_WRITE_REG(&sc->hw, GPIE,
3910 		    E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
3911 		    E1000_GPIE_PBA | E1000_GPIE_NSICR);
3912 
3913 	/* Turn on MSIX */
3914 	switch (sc->hw.mac_type) {
3915 	case em_82580:
3916 	case em_i350:
3917 	case em_i210:
3918 		/* RX entries */
3919 		/*
3920 		 * Note, this maps Queues into MSIX vectors, it works fine.
3921 		 * The funky calculation of offsets and checking if que->me is
3922 		 * odd is due to the weird register distribution, the datasheet
3923 		 * explains it well.
3924 		 */
3925 		FOREACH_QUEUE(sc, que) {
3926 			index = que->me >> 1;
3927 			ivar = E1000_READ_REG_ARRAY(&sc->hw, IVAR0, index);
3928 			if (que->me & 1) {
3929 				ivar &= 0xFF00FFFF;
3930 				ivar |= (que->me | E1000_IVAR_VALID) << 16;
3931 			} else {
3932 				ivar &= 0xFFFFFF00;
3933 				ivar |= que->me | E1000_IVAR_VALID;
3934 			}
3935 			E1000_WRITE_REG_ARRAY(&sc->hw, IVAR0, index, ivar);
3936 		}
3937 
3938 		/* TX entries */
3939 		FOREACH_QUEUE(sc, que) {
3940 			index = que->me >> 1;
3941 			ivar = E1000_READ_REG_ARRAY(&sc->hw, IVAR0, index);
3942 			if (que->me & 1) {
3943 				ivar &= 0x00FFFFFF;
3944 				ivar |= (que->me | E1000_IVAR_VALID) << 24;
3945 			} else {
3946 				ivar &= 0xFFFF00FF;
3947 				ivar |= (que->me | E1000_IVAR_VALID) << 8;
3948 			}
3949 			E1000_WRITE_REG_ARRAY(&sc->hw, IVAR0, index, ivar);
3950 			sc->msix_queuesmask |= que->eims;
3951 		}
3952 
3953 		/* And for the link interrupt */
3954 		ivar = (sc->msix_linkvec | E1000_IVAR_VALID) << 8;
3955 		sc->msix_linkmask = 1 << sc->msix_linkvec;
3956 		E1000_WRITE_REG(&sc->hw, IVAR_MISC, ivar);
3957 		break;
3958 	case em_82576:
3959 		/* RX entries */
3960 		FOREACH_QUEUE(sc, que) {
3961 			index = que->me & 0x7; /* Each IVAR has two entries */
3962 			ivar = E1000_READ_REG_ARRAY(&sc->hw, IVAR0, index);
3963 			if (que->me < 8) {
3964 				ivar &= 0xFFFFFF00;
3965 				ivar |= que->me | E1000_IVAR_VALID;
3966 			} else {
3967 				ivar &= 0xFF00FFFF;
3968 				ivar |= (que->me | E1000_IVAR_VALID) << 16;
3969 			}
3970 			E1000_WRITE_REG_ARRAY(&sc->hw, IVAR0, index, ivar);
3971 			sc->msix_queuesmask |= que->eims;
3972 		}
3973 		/* TX entries */
3974 		FOREACH_QUEUE(sc, que) {
3975 			index = que->me & 0x7; /* Each IVAR has two entries */
3976 			ivar = E1000_READ_REG_ARRAY(&sc->hw, IVAR0, index);
3977 			if (que->me < 8) {
3978 				ivar &= 0xFFFF00FF;
3979 				ivar |= (que->me | E1000_IVAR_VALID) << 8;
3980 			} else {
3981 				ivar &= 0x00FFFFFF;
3982 				ivar |= (que->me | E1000_IVAR_VALID) << 24;
3983 			}
3984 			E1000_WRITE_REG_ARRAY(&sc->hw, IVAR0, index, ivar);
3985 			sc->msix_queuesmask |= que->eims;
3986 		}
3987 
3988 		/* And for the link interrupt */
3989 		ivar = (sc->msix_linkvec | E1000_IVAR_VALID) << 8;
3990 		sc->msix_linkmask = 1 << sc->msix_linkvec;
3991 		E1000_WRITE_REG(&sc->hw, IVAR_MISC, ivar);
3992 		break;
3993 	default:
3994 		panic("unsupported mac");
3995 		break;
3996 	}
3997 
3998 	/* Set the starting interrupt rate */
3999 	newitr = (4000000 / MAX_INTS_PER_SEC) & 0x7FFC;
4000 
4001 	if (sc->hw.mac_type == em_82575)
4002 		newitr |= newitr << 16;
4003 	else
4004 		newitr |= E1000_EITR_CNT_IGNR;
4005 
4006 	FOREACH_QUEUE(sc, que)
4007 		E1000_WRITE_REG(&sc->hw, EITR(que->me), newitr);
4008 
4009 	return (0);
4010 }
4011 
4012 void
4013 em_enable_queue_intr_msix(struct em_queue *que)
4014 {
4015 	E1000_WRITE_REG(&que->sc->hw, EIMS, que->eims);
4016 }
4017 #endif /* !SMALL_KERNEL */
4018 
4019 int
4020 em_allocate_desc_rings(struct em_softc *sc)
4021 {
4022 	struct em_queue *que;
4023 
4024 	FOREACH_QUEUE(sc, que) {
4025 		/* Allocate Transmit Descriptor ring */
4026 		if (em_dma_malloc(sc, sc->sc_tx_slots * sizeof(struct em_tx_desc),
4027 		    &que->tx.sc_tx_dma) != 0) {
4028 			printf("%s: Unable to allocate tx_desc memory\n",
4029 			    DEVNAME(sc));
4030 			return (ENOMEM);
4031 		}
4032 		que->tx.sc_tx_desc_ring =
4033 		    (struct em_tx_desc *)que->tx.sc_tx_dma.dma_vaddr;
4034 
4035 		/* Allocate Receive Descriptor ring */
4036 		if (em_dma_malloc(sc, sc->sc_rx_slots * sizeof(struct em_rx_desc),
4037 		    &que->rx.sc_rx_dma) != 0) {
4038 			printf("%s: Unable to allocate rx_desc memory\n",
4039 			    DEVNAME(sc));
4040 			return (ENOMEM);
4041 		}
4042 		que->rx.sc_rx_desc_ring =
4043 		    (struct em_rx_desc *)que->rx.sc_rx_dma.dma_vaddr;
4044 	}
4045 
4046 	return (0);
4047 }
4048 
4049 int
4050 em_get_sffpage(struct em_softc *sc, struct if_sffpage *sff)
4051 {
4052 	struct em_hw *hw = &sc->hw;
4053 	size_t i;
4054 	int off;
4055 
4056 	if (hw->mac_type != em_82575 && hw->mac_type != em_82580 &&
4057 	    hw->mac_type != em_82576 &&
4058 	    hw->mac_type != em_i210 && hw->mac_type != em_i350)
4059 		return (ENODEV);
4060 
4061 	if (sff->sff_addr == IFSFF_ADDR_EEPROM)
4062 		off = E1000_I2CCMD_SFP_DATA_ADDR(0);
4063 	else if (sff->sff_addr == IFSFF_ADDR_DDM)
4064 		off = E1000_I2CCMD_SFP_DIAG_ADDR(0);
4065 	else
4066 		return (EIO);
4067 
4068 	for (i = 0; i < sizeof(sff->sff_data); i++) {
4069 		if (em_read_sfp_data_byte(hw, off + i,
4070 		    &sff->sff_data[i]) != E1000_SUCCESS)
4071 			return (EIO);
4072 	}
4073 
4074 	return (0);
4075 }
4076