xref: /openbsd-src/sys/dev/pci/if_em.c (revision 3374c67d44f9b75b98444cbf63020f777792342e)
1 /**************************************************************************
2 
3 Copyright (c) 2001-2003, Intel Corporation
4 All rights reserved.
5 
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8 
9  1. Redistributions of source code must retain the above copyright notice,
10     this list of conditions and the following disclaimer.
11 
12  2. Redistributions in binary form must reproduce the above copyright
13     notice, this list of conditions and the following disclaimer in the
14     documentation and/or other materials provided with the distribution.
15 
16  3. Neither the name of the Intel Corporation nor the names of its
17     contributors may be used to endorse or promote products derived from
18     this software without specific prior written permission.
19 
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 POSSIBILITY OF SUCH DAMAGE.
31 
32 ***************************************************************************/
33 
34 /* $OpenBSD: if_em.c,v 1.363 2022/11/06 18:17:56 mbuhl Exp $ */
35 /* $FreeBSD: if_em.c,v 1.46 2004/09/29 18:28:28 mlaier Exp $ */
36 
37 #include <dev/pci/if_em.h>
38 #include <dev/pci/if_em_soc.h>
39 
40 #include <netinet/ip6.h>
41 
42 /*********************************************************************
43  *  Driver version
44  *********************************************************************/
45 
46 #define EM_DRIVER_VERSION	"6.2.9"
47 
48 /*********************************************************************
49  *  PCI Device ID Table
50  *********************************************************************/
51 const struct pci_matchid em_devices[] = {
52 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_80003ES2LAN_CPR_DPT },
53 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_80003ES2LAN_SDS_DPT },
54 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_80003ES2LAN_CPR_SPT },
55 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_80003ES2LAN_SDS_SPT },
56 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82540EM },
57 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82540EM_LOM },
58 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82540EP },
59 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82540EP_LOM },
60 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82540EP_LP },
61 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82541EI },
62 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82541EI_MOBILE },
63 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82541ER },
64 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82541ER_LOM },
65 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82541GI },
66 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82541GI_LF },
67 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82541GI_MOBILE },
68 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82542 },
69 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82543GC_COPPER },
70 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82543GC_FIBER },
71 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82544EI_COPPER },
72 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82544EI_FIBER },
73 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82544GC_COPPER },
74 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82544GC_LOM },
75 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82545EM_COPPER },
76 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82545EM_FIBER },
77 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82545GM_COPPER },
78 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82545GM_FIBER },
79 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82545GM_SERDES },
80 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82546EB_COPPER },
81 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82546EB_FIBER },
82 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82546EB_QUAD_CPR },
83 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82546GB_COPPER },
84 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82546GB_FIBER },
85 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82546GB_PCIE },
86 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82546GB_QUAD_CPR },
87 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82546GB_QUAD_CPR_K },
88 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82546GB_SERDES },
89 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82546GB_2 },
90 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82547EI },
91 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82547EI_MOBILE },
92 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82547GI },
93 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82571EB_AF },
94 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82571EB_AT },
95 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82571EB_COPPER },
96 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82571EB_FIBER },
97 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82571EB_QUAD_CPR },
98 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82571EB_QUAD_CPR_LP },
99 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82571EB_QUAD_FBR },
100 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82571EB_SERDES },
101 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82571EB_SDS_DUAL },
102 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82571EB_SDS_QUAD },
103 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82571PT_QUAD_CPR },
104 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82572EI_COPPER },
105 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82572EI_FIBER },
106 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82572EI_SERDES },
107 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82572EI },
108 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82573E },
109 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82573E_IAMT },
110 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82573E_PM },
111 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82573L },
112 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82573L_PL_1 },
113 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82573L_PL_2 },
114 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82573V_PM },
115 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82574L },
116 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82574LA },
117 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82575EB_COPPER },
118 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82575EB_SERDES },
119 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82575GB_QUAD_CPR },
120 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82575GB_QP_PM },
121 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82576 },
122 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82576_FIBER },
123 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82576_SERDES },
124 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82576_QUAD_COPPER },
125 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82576_QUAD_CU_ET2 },
126 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82576_NS },
127 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82576_NS_SERDES },
128 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82576_SERDES_QUAD },
129 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82577LC },
130 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82577LM },
131 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82578DC },
132 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82578DM },
133 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82579LM },
134 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82579V },
135 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I210_COPPER },
136 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I210_COPPER_OEM1 },
137 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I210_COPPER_IT },
138 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I210_FIBER },
139 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I210_SERDES },
140 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I210_SGMII },
141 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I210_COPPER_NF },
142 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I210_SERDES_NF },
143 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I211_COPPER },
144 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I217_LM },
145 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I217_V },
146 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I218_LM },
147 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I218_LM_2 },
148 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I218_LM_3 },
149 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I218_V },
150 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I218_V_2 },
151 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I218_V_3 },
152 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM },
153 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM2 },
154 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM3 },
155 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM4 },
156 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM5 },
157 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM6 },
158 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM7 },
159 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM8 },
160 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM9 },
161 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM10 },
162 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM11 },
163 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM12 },
164 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM13 },
165 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM14 },
166 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM15 },
167 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM16 },
168 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM17 },
169 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM18 },
170 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM19 },
171 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V },
172 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V2 },
173 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V4 },
174 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V5 },
175 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V6 },
176 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V7 },
177 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V8 },
178 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V9 },
179 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V10 },
180 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V11 },
181 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V12 },
182 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V13 },
183 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V14 },
184 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V15 },
185 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V16 },
186 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V17 },
187 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V18 },
188 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V19 },
189 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82580_COPPER },
190 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82580_FIBER },
191 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82580_SERDES },
192 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82580_SGMII },
193 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82580_COPPER_DUAL },
194 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82580_QUAD_FIBER },
195 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_DH89XXCC_SGMII },
196 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_DH89XXCC_SERDES },
197 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_DH89XXCC_BPLANE },
198 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_DH89XXCC_SFP },
199 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82583V },
200 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I350_COPPER },
201 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I350_FIBER },
202 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I350_SERDES },
203 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I350_SGMII },
204 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I354_BP_1GBPS },
205 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I354_BP_2_5GBPS },
206 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I354_SGMII },
207 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH8_82567V_3 },
208 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH8_IFE },
209 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH8_IFE_G },
210 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH8_IFE_GT },
211 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH8_IGP_AMT },
212 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH8_IGP_C },
213 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH8_IGP_M },
214 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH8_IGP_M_AMT },
215 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH9_BM },
216 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH9_IFE },
217 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH9_IFE_G },
218 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH9_IFE_GT },
219 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH9_IGP_AMT },
220 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH9_IGP_C },
221 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH9_IGP_M },
222 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH9_IGP_M_AMT },
223 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH9_IGP_M_V },
224 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH10_D_BM_LF },
225 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH10_D_BM_LM },
226 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH10_D_BM_V },
227 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH10_R_BM_LF },
228 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH10_R_BM_LM },
229 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH10_R_BM_V },
230 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_EP80579_LAN_1 },
231 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_EP80579_LAN_2 },
232 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_EP80579_LAN_3 },
233 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_EP80579_LAN_4 },
234 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_EP80579_LAN_5 },
235 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_EP80579_LAN_6 }
236 };
237 
238 /*********************************************************************
239  *  Function prototypes
240  *********************************************************************/
241 int  em_probe(struct device *, void *, void *);
242 void em_attach(struct device *, struct device *, void *);
243 void em_defer_attach(struct device*);
244 int  em_detach(struct device *, int);
245 int  em_activate(struct device *, int);
246 int  em_intr(void *);
247 int  em_allocate_legacy(struct em_softc *);
248 void em_start(struct ifqueue *);
249 int  em_ioctl(struct ifnet *, u_long, caddr_t);
250 void em_watchdog(struct ifnet *);
251 void em_init(void *);
252 void em_stop(void *, int);
253 void em_media_status(struct ifnet *, struct ifmediareq *);
254 int  em_media_change(struct ifnet *);
255 uint64_t  em_flowstatus(struct em_softc *);
256 void em_identify_hardware(struct em_softc *);
257 int  em_allocate_pci_resources(struct em_softc *);
258 void em_free_pci_resources(struct em_softc *);
259 void em_local_timer(void *);
260 int  em_hardware_init(struct em_softc *);
261 void em_setup_interface(struct em_softc *);
262 int  em_setup_transmit_structures(struct em_softc *);
263 void em_initialize_transmit_unit(struct em_softc *);
264 int  em_setup_receive_structures(struct em_softc *);
265 void em_initialize_receive_unit(struct em_softc *);
266 void em_enable_intr(struct em_softc *);
267 void em_disable_intr(struct em_softc *);
268 void em_free_transmit_structures(struct em_softc *);
269 void em_free_receive_structures(struct em_softc *);
270 void em_update_stats_counters(struct em_softc *);
271 void em_disable_aspm(struct em_softc *);
272 void em_txeof(struct em_queue *);
273 int  em_allocate_receive_structures(struct em_softc *);
274 int  em_allocate_transmit_structures(struct em_softc *);
275 int  em_allocate_desc_rings(struct em_softc *);
276 int  em_rxfill(struct em_queue *);
277 void em_rxrefill(void *);
278 int  em_rxeof(struct em_queue *);
279 void em_receive_checksum(struct em_softc *, struct em_rx_desc *,
280 			 struct mbuf *);
281 u_int	em_transmit_checksum_setup(struct em_queue *, struct mbuf *, u_int,
282 	    u_int32_t *, u_int32_t *);
283 u_int	em_tx_ctx_setup(struct em_queue *, struct mbuf *, u_int, u_int32_t *,
284 	    u_int32_t *);
285 void em_iff(struct em_softc *);
286 void em_update_link_status(struct em_softc *);
287 int  em_get_buf(struct em_queue *, int);
288 void em_enable_hw_vlans(struct em_softc *);
289 u_int em_encap(struct em_queue *, struct mbuf *);
290 void em_smartspeed(struct em_softc *);
291 int  em_82547_fifo_workaround(struct em_softc *, int);
292 void em_82547_update_fifo_head(struct em_softc *, int);
293 int  em_82547_tx_fifo_reset(struct em_softc *);
294 void em_82547_move_tail(void *arg);
295 void em_82547_move_tail_locked(struct em_softc *);
296 int  em_dma_malloc(struct em_softc *, bus_size_t, struct em_dma_alloc *);
297 void em_dma_free(struct em_softc *, struct em_dma_alloc *);
298 u_int32_t em_fill_descriptors(u_int64_t address, u_int32_t length,
299 			      PDESC_ARRAY desc_array);
300 void em_flush_tx_ring(struct em_queue *);
301 void em_flush_rx_ring(struct em_queue *);
302 void em_flush_desc_rings(struct em_softc *);
303 int em_get_sffpage(struct em_softc *, struct if_sffpage *);
304 
305 #ifndef SMALL_KERNEL
306 /* MSIX/Multiqueue functions */
307 int  em_allocate_msix(struct em_softc *);
308 int  em_setup_queues_msix(struct em_softc *);
309 int  em_queue_intr_msix(void *);
310 int  em_link_intr_msix(void *);
311 void em_enable_queue_intr_msix(struct em_queue *);
312 #else
313 #define em_allocate_msix(_sc) 	(-1)
314 #endif
315 
316 #if NKSTAT > 0
317 void	em_kstat_attach(struct em_softc *);
318 int	em_kstat_read(struct kstat *);
319 void	em_tbi_adjust_stats(struct em_softc *, uint32_t, uint8_t *);
320 #endif
321 
322 /*********************************************************************
323  *  OpenBSD Device Interface Entry Points
324  *********************************************************************/
325 
326 const struct cfattach em_ca = {
327 	sizeof(struct em_softc), em_probe, em_attach, em_detach,
328 	em_activate
329 };
330 
331 struct cfdriver em_cd = {
332 	NULL, "em", DV_IFNET
333 };
334 
335 static int em_smart_pwr_down = FALSE;
336 int em_enable_msix = 0;
337 
338 /*********************************************************************
339  *  Device identification routine
340  *
341  *  em_probe determines if the driver should be loaded on
342  *  adapter based on PCI vendor/device id of the adapter.
343  *
344  *  return 0 on no match, positive on match
345  *********************************************************************/
346 
347 int
348 em_probe(struct device *parent, void *match, void *aux)
349 {
350 	INIT_DEBUGOUT("em_probe: begin");
351 
352 	return (pci_matchbyid((struct pci_attach_args *)aux, em_devices,
353 	    nitems(em_devices)));
354 }
355 
356 void
357 em_defer_attach(struct device *self)
358 {
359 	struct em_softc *sc = (struct em_softc *)self;
360 	struct pci_attach_args *pa = &sc->osdep.em_pa;
361 	pci_chipset_tag_t	pc = pa->pa_pc;
362 	void *gcu;
363 
364 	INIT_DEBUGOUT("em_defer_attach: begin");
365 
366 	if ((gcu = em_lookup_gcu(self)) == 0) {
367 		printf("%s: No GCU found, deferred attachment failed\n",
368 		    DEVNAME(sc));
369 
370 		if (sc->sc_intrhand)
371 			pci_intr_disestablish(pc, sc->sc_intrhand);
372 		sc->sc_intrhand = 0;
373 
374 		em_stop(sc, 1);
375 
376 		em_free_pci_resources(sc);
377 
378 		return;
379 	}
380 
381 	sc->hw.gcu = gcu;
382 
383 	em_attach_miibus(self);
384 
385 	em_setup_interface(sc);
386 
387 	em_setup_link(&sc->hw);
388 
389 	em_update_link_status(sc);
390 }
391 
392 /*********************************************************************
393  *  Device initialization routine
394  *
395  *  The attach entry point is called when the driver is being loaded.
396  *  This routine identifies the type of hardware, allocates all resources
397  *  and initializes the hardware.
398  *
399  *********************************************************************/
400 
401 void
402 em_attach(struct device *parent, struct device *self, void *aux)
403 {
404 	struct pci_attach_args *pa = aux;
405 	struct em_softc *sc;
406 	int defer = 0;
407 
408 	INIT_DEBUGOUT("em_attach: begin");
409 
410 	sc = (struct em_softc *)self;
411 	sc->sc_dmat = pa->pa_dmat;
412 	sc->osdep.em_pa = *pa;
413 
414 	timeout_set(&sc->timer_handle, em_local_timer, sc);
415 	timeout_set(&sc->tx_fifo_timer_handle, em_82547_move_tail, sc);
416 
417 	rw_init(&sc->sfflock, "emsff");
418 
419 	/* Determine hardware revision */
420 	em_identify_hardware(sc);
421 
422 	/*
423 	 * Only use MSI on the newer PCIe parts, with the exception
424 	 * of 82571/82572 due to "Byte Enables 2 and 3 Are Not Set" errata
425 	 */
426 	if (sc->hw.mac_type <= em_82572)
427 		sc->osdep.em_pa.pa_flags &= ~PCI_FLAGS_MSI_ENABLED;
428 
429 	/* Parameters (to be read from user) */
430 	if (sc->hw.mac_type >= em_82544) {
431 		sc->sc_tx_slots = EM_MAX_TXD;
432 		sc->sc_rx_slots = EM_MAX_RXD;
433 	} else {
434 		sc->sc_tx_slots = EM_MAX_TXD_82543;
435 		sc->sc_rx_slots = EM_MAX_RXD_82543;
436 	}
437 	sc->tx_int_delay = EM_TIDV;
438 	sc->tx_abs_int_delay = EM_TADV;
439 	sc->rx_int_delay = EM_RDTR;
440 	sc->rx_abs_int_delay = EM_RADV;
441 	sc->hw.autoneg = DO_AUTO_NEG;
442 	sc->hw.wait_autoneg_complete = WAIT_FOR_AUTO_NEG_DEFAULT;
443 	sc->hw.autoneg_advertised = AUTONEG_ADV_DEFAULT;
444 	sc->hw.tbi_compatibility_en = TRUE;
445 	sc->sc_rx_buffer_len = EM_RXBUFFER_2048;
446 
447 	sc->hw.phy_init_script = 1;
448 	sc->hw.phy_reset_disable = FALSE;
449 
450 #ifndef EM_MASTER_SLAVE
451 	sc->hw.master_slave = em_ms_hw_default;
452 #else
453 	sc->hw.master_slave = EM_MASTER_SLAVE;
454 #endif
455 
456 	/*
457 	 * This controls when hardware reports transmit completion
458 	 * status.
459 	 */
460 	sc->hw.report_tx_early = 1;
461 
462 	if (em_allocate_pci_resources(sc))
463 		goto err_pci;
464 
465 	/* Initialize eeprom parameters */
466 	em_init_eeprom_params(&sc->hw);
467 
468 	/*
469 	 * Set the max frame size assuming standard Ethernet
470 	 * sized frames.
471 	 */
472 	switch (sc->hw.mac_type) {
473 		case em_82573:
474 		{
475 			uint16_t	eeprom_data = 0;
476 
477 			/*
478 			 * 82573 only supports Jumbo frames
479 			 * if ASPM is disabled.
480 			 */
481 			em_read_eeprom(&sc->hw, EEPROM_INIT_3GIO_3,
482 			    1, &eeprom_data);
483 			if (eeprom_data & EEPROM_WORD1A_ASPM_MASK) {
484 				sc->hw.max_frame_size = ETHER_MAX_LEN;
485 				break;
486 			}
487 			/* Allow Jumbo frames */
488 			/* FALLTHROUGH */
489 		}
490 		case em_82571:
491 		case em_82572:
492 		case em_82574:
493 		case em_82575:
494 		case em_82576:
495 		case em_82580:
496 		case em_i210:
497 		case em_i350:
498 		case em_ich9lan:
499 		case em_ich10lan:
500 		case em_pch2lan:
501 		case em_pch_lpt:
502 		case em_pch_spt:
503 		case em_pch_cnp:
504 		case em_pch_tgp:
505 		case em_pch_adp:
506 		case em_80003es2lan:
507 			/* 9K Jumbo Frame size */
508 			sc->hw.max_frame_size = 9234;
509 			break;
510 		case em_pchlan:
511 			sc->hw.max_frame_size = 4096;
512 			break;
513 		case em_82542_rev2_0:
514 		case em_82542_rev2_1:
515 		case em_ich8lan:
516 			/* Adapters that do not support Jumbo frames */
517 			sc->hw.max_frame_size = ETHER_MAX_LEN;
518 			break;
519 		default:
520 			sc->hw.max_frame_size =
521 			    MAX_JUMBO_FRAME_SIZE;
522 	}
523 
524 	sc->hw.min_frame_size =
525 	    ETHER_MIN_LEN + ETHER_CRC_LEN;
526 
527 	if (em_allocate_desc_rings(sc) != 0) {
528 		printf("%s: Unable to allocate descriptor ring memory\n",
529 		    DEVNAME(sc));
530 		goto err_pci;
531 	}
532 
533 	/* Initialize the hardware */
534 	if ((defer = em_hardware_init(sc))) {
535 		if (defer == EAGAIN)
536 			config_defer(self, em_defer_attach);
537 		else {
538 			printf("%s: Unable to initialize the hardware\n",
539 			    DEVNAME(sc));
540 			goto err_pci;
541 		}
542 	}
543 
544 	if (sc->hw.mac_type == em_80003es2lan || sc->hw.mac_type == em_82575 ||
545 	    sc->hw.mac_type == em_82576 ||
546 	    sc->hw.mac_type == em_82580 || sc->hw.mac_type == em_i210 ||
547 	    sc->hw.mac_type == em_i350) {
548 		uint32_t reg = EM_READ_REG(&sc->hw, E1000_STATUS);
549 		sc->hw.bus_func = (reg & E1000_STATUS_FUNC_MASK) >>
550 		    E1000_STATUS_FUNC_SHIFT;
551 
552 		switch (sc->hw.bus_func) {
553 		case 0:
554 			sc->hw.swfw = E1000_SWFW_PHY0_SM;
555 			break;
556 		case 1:
557 			sc->hw.swfw = E1000_SWFW_PHY1_SM;
558 			break;
559 		case 2:
560 			sc->hw.swfw = E1000_SWFW_PHY2_SM;
561 			break;
562 		case 3:
563 			sc->hw.swfw = E1000_SWFW_PHY3_SM;
564 			break;
565 		}
566 	} else {
567 		sc->hw.bus_func = 0;
568 	}
569 
570 	/* Copy the permanent MAC address out of the EEPROM */
571 	if (em_read_mac_addr(&sc->hw) < 0) {
572 		printf("%s: EEPROM read error while reading mac address\n",
573 		       DEVNAME(sc));
574 		goto err_pci;
575 	}
576 
577 	bcopy(sc->hw.mac_addr, sc->sc_ac.ac_enaddr, ETHER_ADDR_LEN);
578 
579 	/* Setup OS specific network interface */
580 	if (!defer)
581 		em_setup_interface(sc);
582 
583 	/* Initialize statistics */
584 	em_clear_hw_cntrs(&sc->hw);
585 #if NKSTAT > 0
586 	em_kstat_attach(sc);
587 #endif
588 	sc->hw.get_link_status = 1;
589 	if (!defer)
590 		em_update_link_status(sc);
591 
592 #ifdef EM_DEBUG
593 	printf(", mac %#x phy %#x", sc->hw.mac_type, sc->hw.phy_type);
594 #endif
595 	printf(", address %s\n", ether_sprintf(sc->sc_ac.ac_enaddr));
596 
597 	/* Indicate SOL/IDER usage */
598 	if (em_check_phy_reset_block(&sc->hw))
599 		printf("%s: PHY reset is blocked due to SOL/IDER session.\n",
600 		    DEVNAME(sc));
601 
602 	/* Identify 82544 on PCI-X */
603 	em_get_bus_info(&sc->hw);
604 	if (sc->hw.bus_type == em_bus_type_pcix &&
605 	    sc->hw.mac_type == em_82544)
606 		sc->pcix_82544 = TRUE;
607         else
608 		sc->pcix_82544 = FALSE;
609 
610 	sc->hw.icp_xxxx_is_link_up = FALSE;
611 
612 	INIT_DEBUGOUT("em_attach: end");
613 	return;
614 
615 err_pci:
616 	em_free_pci_resources(sc);
617 }
618 
619 /*********************************************************************
620  *  Transmit entry point
621  *
622  *  em_start is called by the stack to initiate a transmit.
623  *  The driver will remain in this routine as long as there are
624  *  packets to transmit and transmit resources are available.
625  *  In case resources are not available stack is notified and
626  *  the packet is requeued.
627  **********************************************************************/
628 
629 void
630 em_start(struct ifqueue *ifq)
631 {
632 	struct ifnet *ifp = ifq->ifq_if;
633 	struct em_softc *sc = ifp->if_softc;
634 	u_int head, free, used;
635 	struct mbuf *m;
636 	int post = 0;
637 	struct em_queue *que = sc->queues; /* Use only first queue. */
638 
639 	if (!sc->link_active) {
640 		ifq_purge(ifq);
641 		return;
642 	}
643 
644 	/* calculate free space */
645 	head = que->tx.sc_tx_desc_head;
646 	free = que->tx.sc_tx_desc_tail;
647 	if (free <= head)
648 		free += sc->sc_tx_slots;
649 	free -= head;
650 
651 	if (sc->hw.mac_type != em_82547) {
652 		bus_dmamap_sync(sc->sc_dmat, que->tx.sc_tx_dma.dma_map,
653 		    0, que->tx.sc_tx_dma.dma_map->dm_mapsize,
654 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
655 	}
656 
657 	for (;;) {
658 		/* use 2 because cksum setup can use an extra slot */
659 		if (EM_MAX_SCATTER + 2 > free) {
660 			ifq_set_oactive(ifq);
661 			break;
662 		}
663 
664 		m = ifq_dequeue(ifq);
665 		if (m == NULL)
666 			break;
667 
668 		used = em_encap(que, m);
669 		if (used == 0) {
670 			m_freem(m);
671 			continue;
672 		}
673 
674 		KASSERT(used <= free);
675 
676 		free -= used;
677 
678 #if NBPFILTER > 0
679 		/* Send a copy of the frame to the BPF listener */
680 		if (ifp->if_bpf)
681 			bpf_mtap_ether(ifp->if_bpf, m, BPF_DIRECTION_OUT);
682 #endif
683 
684 		/* Set timeout in case hardware has problems transmitting */
685 		ifp->if_timer = EM_TX_TIMEOUT;
686 
687 		if (sc->hw.mac_type == em_82547) {
688 			int len = m->m_pkthdr.len;
689 
690 			if (sc->link_duplex == HALF_DUPLEX)
691 				em_82547_move_tail_locked(sc);
692 			else {
693 				E1000_WRITE_REG(&sc->hw, TDT(que->me),
694 				    que->tx.sc_tx_desc_head);
695 				em_82547_update_fifo_head(sc, len);
696 			}
697 		}
698 
699 		post = 1;
700 	}
701 
702 	if (sc->hw.mac_type != em_82547) {
703 		bus_dmamap_sync(sc->sc_dmat, que->tx.sc_tx_dma.dma_map,
704 		    0, que->tx.sc_tx_dma.dma_map->dm_mapsize,
705 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
706 		/*
707 		 * Advance the Transmit Descriptor Tail (Tdt),
708 		 * this tells the E1000 that this frame is
709 		 * available to transmit.
710 		 */
711 		if (post)
712 			E1000_WRITE_REG(&sc->hw, TDT(que->me),
713 			    que->tx.sc_tx_desc_head);
714 	}
715 }
716 
717 /*********************************************************************
718  *  Ioctl entry point
719  *
720  *  em_ioctl is called when the user wants to configure the
721  *  interface.
722  *
723  *  return 0 on success, positive on failure
724  **********************************************************************/
725 
726 int
727 em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
728 {
729 	int		error = 0;
730 	struct ifreq   *ifr = (struct ifreq *) data;
731 	struct em_softc *sc = ifp->if_softc;
732 	int s;
733 
734 	s = splnet();
735 
736 	switch (command) {
737 	case SIOCSIFADDR:
738 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFADDR (Set Interface "
739 			       "Addr)");
740 		if (!(ifp->if_flags & IFF_UP)) {
741 			ifp->if_flags |= IFF_UP;
742 			em_init(sc);
743 		}
744 		break;
745 
746 	case SIOCSIFFLAGS:
747 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFFLAGS (Set Interface Flags)");
748 		if (ifp->if_flags & IFF_UP) {
749 			if (ifp->if_flags & IFF_RUNNING)
750 				error = ENETRESET;
751 			else
752 				em_init(sc);
753 		} else {
754 			if (ifp->if_flags & IFF_RUNNING)
755 				em_stop(sc, 0);
756 		}
757 		break;
758 
759 	case SIOCSIFMEDIA:
760 		/* Check SOL/IDER usage */
761 		if (em_check_phy_reset_block(&sc->hw)) {
762 			printf("%s: Media change is blocked due to SOL/IDER session.\n",
763 			    DEVNAME(sc));
764 			break;
765 		}
766 	case SIOCGIFMEDIA:
767 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCxIFMEDIA (Get/Set Interface Media)");
768 		error = ifmedia_ioctl(ifp, ifr, &sc->media, command);
769 		break;
770 
771 	case SIOCGIFRXR:
772 		error = if_rxr_ioctl((struct if_rxrinfo *)ifr->ifr_data,
773 		    NULL, EM_MCLBYTES, &sc->queues->rx.sc_rx_ring);
774 		break;
775 
776 	case SIOCGIFSFFPAGE:
777 		error = rw_enter(&sc->sfflock, RW_WRITE|RW_INTR);
778 		if (error != 0)
779 			break;
780 
781 		error = em_get_sffpage(sc, (struct if_sffpage *)data);
782 		rw_exit(&sc->sfflock);
783 		break;
784 
785 	default:
786 		error = ether_ioctl(ifp, &sc->sc_ac, command, data);
787 	}
788 
789 	if (error == ENETRESET) {
790 		if (ifp->if_flags & IFF_RUNNING) {
791 			em_disable_intr(sc);
792 			em_iff(sc);
793 			if (sc->hw.mac_type == em_82542_rev2_0)
794 				em_initialize_receive_unit(sc);
795 			em_enable_intr(sc);
796 		}
797 		error = 0;
798 	}
799 
800 	splx(s);
801 	return (error);
802 }
803 
804 /*********************************************************************
805  *  Watchdog entry point
806  *
807  *  This routine is called whenever hardware quits transmitting.
808  *
809  **********************************************************************/
810 
811 void
812 em_watchdog(struct ifnet *ifp)
813 {
814 	struct em_softc *sc = ifp->if_softc;
815 	struct em_queue *que = sc->queues; /* Use only first queue. */
816 
817 
818 	/* If we are in this routine because of pause frames, then
819 	 * don't reset the hardware.
820 	 */
821 	if (E1000_READ_REG(&sc->hw, STATUS) & E1000_STATUS_TXOFF) {
822 		ifp->if_timer = EM_TX_TIMEOUT;
823 		return;
824 	}
825 	printf("%s: watchdog: head %u tail %u TDH %u TDT %u\n",
826 	    DEVNAME(sc),
827 	    que->tx.sc_tx_desc_head, que->tx.sc_tx_desc_tail,
828 	    E1000_READ_REG(&sc->hw, TDH(que->me)),
829 	    E1000_READ_REG(&sc->hw, TDT(que->me)));
830 
831 	em_init(sc);
832 
833 	sc->watchdog_events++;
834 }
835 
836 /*********************************************************************
837  *  Init entry point
838  *
839  *  This routine is used in two ways. It is used by the stack as
840  *  init entry point in network interface structure. It is also used
841  *  by the driver as a hw/sw initialization routine to get to a
842  *  consistent state.
843  *
844  **********************************************************************/
845 
846 void
847 em_init(void *arg)
848 {
849 	struct em_softc *sc = arg;
850 	struct ifnet   *ifp = &sc->sc_ac.ac_if;
851 	uint32_t	pba;
852 	int s;
853 
854 	s = splnet();
855 
856 	INIT_DEBUGOUT("em_init: begin");
857 
858 	em_stop(sc, 0);
859 
860 	/*
861 	 * Packet Buffer Allocation (PBA)
862 	 * Writing PBA sets the receive portion of the buffer
863 	 * the remainder is used for the transmit buffer.
864 	 *
865 	 * Devices before the 82547 had a Packet Buffer of 64K.
866 	 *   Default allocation: PBA=48K for Rx, leaving 16K for Tx.
867 	 * After the 82547 the buffer was reduced to 40K.
868 	 *   Default allocation: PBA=30K for Rx, leaving 10K for Tx.
869 	 *   Note: default does not leave enough room for Jumbo Frame >10k.
870 	 */
871 	switch (sc->hw.mac_type) {
872 	case em_82547:
873 	case em_82547_rev_2: /* 82547: Total Packet Buffer is 40K */
874 		if (sc->hw.max_frame_size > EM_RXBUFFER_8192)
875 			pba = E1000_PBA_22K; /* 22K for Rx, 18K for Tx */
876 		else
877 			pba = E1000_PBA_30K; /* 30K for Rx, 10K for Tx */
878 		sc->tx_fifo_head = 0;
879 		sc->tx_head_addr = pba << EM_TX_HEAD_ADDR_SHIFT;
880 		sc->tx_fifo_size = (E1000_PBA_40K - pba) << EM_PBA_BYTES_SHIFT;
881 		break;
882 	case em_82571:
883 	case em_82572: /* Total Packet Buffer on these is 48k */
884 	case em_82575:
885 	case em_82576:
886 	case em_82580:
887 	case em_80003es2lan:
888 	case em_i350:
889 		pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
890 		break;
891 	case em_i210:
892 		pba = E1000_PBA_34K;
893 		break;
894 	case em_82573: /* 82573: Total Packet Buffer is 32K */
895 		/* Jumbo frames not supported */
896 		pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
897 		break;
898 	case em_82574: /* Total Packet Buffer is 40k */
899 		pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
900 		break;
901 	case em_ich8lan:
902 		pba = E1000_PBA_8K;
903 		break;
904 	case em_ich9lan:
905 	case em_ich10lan:
906 		/* Boost Receive side for jumbo frames */
907 		if (sc->hw.max_frame_size > EM_RXBUFFER_4096)
908 			pba = E1000_PBA_14K;
909 		else
910 			pba = E1000_PBA_10K;
911 		break;
912 	case em_pchlan:
913 	case em_pch2lan:
914 	case em_pch_lpt:
915 	case em_pch_spt:
916 	case em_pch_cnp:
917 	case em_pch_tgp:
918 	case em_pch_adp:
919 		pba = E1000_PBA_26K;
920 		break;
921 	default:
922 		/* Devices before 82547 had a Packet Buffer of 64K.   */
923 		if (sc->hw.max_frame_size > EM_RXBUFFER_8192)
924 			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
925 		else
926 			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
927 	}
928 	INIT_DEBUGOUT1("em_init: pba=%dK",pba);
929 	E1000_WRITE_REG(&sc->hw, PBA, pba);
930 
931 	/* Get the latest mac address, User can use a LAA */
932 	bcopy(sc->sc_ac.ac_enaddr, sc->hw.mac_addr, ETHER_ADDR_LEN);
933 
934 	/* Initialize the hardware */
935 	if (em_hardware_init(sc)) {
936 		printf("%s: Unable to initialize the hardware\n",
937 		       DEVNAME(sc));
938 		splx(s);
939 		return;
940 	}
941 	em_update_link_status(sc);
942 
943 	E1000_WRITE_REG(&sc->hw, VET, ETHERTYPE_VLAN);
944 	if (ifp->if_capabilities & IFCAP_VLAN_HWTAGGING)
945 		em_enable_hw_vlans(sc);
946 
947 	/* Prepare transmit descriptors and buffers */
948 	if (em_setup_transmit_structures(sc)) {
949 		printf("%s: Could not setup transmit structures\n",
950 		       DEVNAME(sc));
951 		em_stop(sc, 0);
952 		splx(s);
953 		return;
954 	}
955 	em_initialize_transmit_unit(sc);
956 
957 	/* Prepare receive descriptors and buffers */
958 	if (em_setup_receive_structures(sc)) {
959 		printf("%s: Could not setup receive structures\n",
960 		       DEVNAME(sc));
961 		em_stop(sc, 0);
962 		splx(s);
963 		return;
964 	}
965 	em_initialize_receive_unit(sc);
966 
967 #ifndef SMALL_KERNEL
968 	if (sc->msix) {
969 		if (em_setup_queues_msix(sc)) {
970 			printf("%s: Can't setup msix queues\n", DEVNAME(sc));
971 			splx(s);
972 			return;
973 		}
974 	}
975 #endif
976 
977 	/* Program promiscuous mode and multicast filters. */
978 	em_iff(sc);
979 
980 	ifp->if_flags |= IFF_RUNNING;
981 	ifq_clr_oactive(&ifp->if_snd);
982 
983 	timeout_add_sec(&sc->timer_handle, 1);
984 	em_clear_hw_cntrs(&sc->hw);
985 	em_enable_intr(sc);
986 
987 	/* Don't reset the phy next time init gets called */
988 	sc->hw.phy_reset_disable = TRUE;
989 
990 	splx(s);
991 }
992 
993 /*********************************************************************
994  *
995  *  Interrupt Service routine
996  *
997  **********************************************************************/
998 int
999 em_intr(void *arg)
1000 {
1001 	struct em_softc	*sc = arg;
1002 	struct em_queue *que = sc->queues; /* single queue */
1003 	struct ifnet	*ifp = &sc->sc_ac.ac_if;
1004 	u_int32_t	reg_icr, test_icr;
1005 
1006 	test_icr = reg_icr = E1000_READ_REG(&sc->hw, ICR);
1007 	if (sc->hw.mac_type >= em_82571)
1008 		test_icr = (reg_icr & E1000_ICR_INT_ASSERTED);
1009 	if (!test_icr)
1010 		return (0);
1011 
1012 	if (ifp->if_flags & IFF_RUNNING) {
1013 		em_txeof(que);
1014 		if (em_rxeof(que))
1015 			em_rxrefill(que);
1016 	}
1017 
1018 	/* Link status change */
1019 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1020 		KERNEL_LOCK();
1021 		sc->hw.get_link_status = 1;
1022 		em_check_for_link(&sc->hw);
1023 		em_update_link_status(sc);
1024 		KERNEL_UNLOCK();
1025 	}
1026 
1027 	return (1);
1028 }
1029 
1030 /*********************************************************************
1031  *
1032  *  Media Ioctl callback
1033  *
1034  *  This routine is called whenever the user queries the status of
1035  *  the interface using ifconfig.
1036  *
1037  **********************************************************************/
1038 void
1039 em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1040 {
1041 	struct em_softc *sc = ifp->if_softc;
1042 	uint64_t fiber_type = IFM_1000_SX;
1043 	u_int16_t gsr;
1044 
1045 	INIT_DEBUGOUT("em_media_status: begin");
1046 
1047 	em_check_for_link(&sc->hw);
1048 	em_update_link_status(sc);
1049 
1050 	ifmr->ifm_status = IFM_AVALID;
1051 	ifmr->ifm_active = IFM_ETHER;
1052 
1053 	if (!sc->link_active) {
1054 		ifmr->ifm_active |= IFM_NONE;
1055 		return;
1056 	}
1057 
1058 	ifmr->ifm_status |= IFM_ACTIVE;
1059 
1060 	if (sc->hw.media_type == em_media_type_fiber ||
1061 	    sc->hw.media_type == em_media_type_internal_serdes) {
1062 		if (sc->hw.mac_type == em_82545)
1063 			fiber_type = IFM_1000_LX;
1064 		ifmr->ifm_active |= fiber_type | IFM_FDX;
1065 	} else {
1066 		switch (sc->link_speed) {
1067 		case 10:
1068 			ifmr->ifm_active |= IFM_10_T;
1069 			break;
1070 		case 100:
1071 			ifmr->ifm_active |= IFM_100_TX;
1072 			break;
1073 		case 1000:
1074 			ifmr->ifm_active |= IFM_1000_T;
1075 			break;
1076 		}
1077 
1078 		if (sc->link_duplex == FULL_DUPLEX)
1079 			ifmr->ifm_active |= em_flowstatus(sc) | IFM_FDX;
1080 		else
1081 			ifmr->ifm_active |= IFM_HDX;
1082 
1083 		if (IFM_SUBTYPE(ifmr->ifm_active) == IFM_1000_T) {
1084 			em_read_phy_reg(&sc->hw, PHY_1000T_STATUS, &gsr);
1085 			if (gsr & SR_1000T_MS_CONFIG_RES)
1086 				ifmr->ifm_active |= IFM_ETH_MASTER;
1087 		}
1088 	}
1089 }
1090 
1091 /*********************************************************************
1092  *
1093  *  Media Ioctl callback
1094  *
1095  *  This routine is called when the user changes speed/duplex using
1096  *  media/mediopt option with ifconfig.
1097  *
1098  **********************************************************************/
1099 int
1100 em_media_change(struct ifnet *ifp)
1101 {
1102 	struct em_softc *sc = ifp->if_softc;
1103 	struct ifmedia	*ifm = &sc->media;
1104 
1105 	INIT_DEBUGOUT("em_media_change: begin");
1106 
1107 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1108 		return (EINVAL);
1109 
1110 	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1111 	case IFM_AUTO:
1112 		sc->hw.autoneg = DO_AUTO_NEG;
1113 		sc->hw.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1114 		break;
1115 	case IFM_1000_LX:
1116 	case IFM_1000_SX:
1117 	case IFM_1000_T:
1118 		sc->hw.autoneg = DO_AUTO_NEG;
1119 		sc->hw.autoneg_advertised = ADVERTISE_1000_FULL;
1120 		break;
1121 	case IFM_100_TX:
1122 		sc->hw.autoneg = FALSE;
1123 		sc->hw.autoneg_advertised = 0;
1124 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1125 			sc->hw.forced_speed_duplex = em_100_full;
1126 		else
1127 			sc->hw.forced_speed_duplex = em_100_half;
1128 		break;
1129 	case IFM_10_T:
1130 		sc->hw.autoneg = FALSE;
1131 		sc->hw.autoneg_advertised = 0;
1132 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1133 			sc->hw.forced_speed_duplex = em_10_full;
1134 		else
1135 			sc->hw.forced_speed_duplex = em_10_half;
1136 		break;
1137 	default:
1138 		printf("%s: Unsupported media type\n", DEVNAME(sc));
1139 	}
1140 
1141 	/*
1142 	 * As the speed/duplex settings may have changed we need to
1143 	 * reset the PHY.
1144 	 */
1145 	sc->hw.phy_reset_disable = FALSE;
1146 
1147 	em_init(sc);
1148 
1149 	return (0);
1150 }
1151 
1152 uint64_t
1153 em_flowstatus(struct em_softc *sc)
1154 {
1155 	u_int16_t ar, lpar;
1156 
1157 	if (sc->hw.media_type == em_media_type_fiber ||
1158 	    sc->hw.media_type == em_media_type_internal_serdes)
1159 		return (0);
1160 
1161 	em_read_phy_reg(&sc->hw, PHY_AUTONEG_ADV, &ar);
1162 	em_read_phy_reg(&sc->hw, PHY_LP_ABILITY, &lpar);
1163 
1164 	if ((ar & NWAY_AR_PAUSE) && (lpar & NWAY_LPAR_PAUSE))
1165 		return (IFM_FLOW|IFM_ETH_TXPAUSE|IFM_ETH_RXPAUSE);
1166 	else if (!(ar & NWAY_AR_PAUSE) && (ar & NWAY_AR_ASM_DIR) &&
1167 		(lpar & NWAY_LPAR_PAUSE) && (lpar & NWAY_LPAR_ASM_DIR))
1168 		return (IFM_FLOW|IFM_ETH_TXPAUSE);
1169 	else if ((ar & NWAY_AR_PAUSE) && (ar & NWAY_AR_ASM_DIR) &&
1170 		!(lpar & NWAY_LPAR_PAUSE) && (lpar & NWAY_LPAR_ASM_DIR))
1171 		return (IFM_FLOW|IFM_ETH_RXPAUSE);
1172 
1173 	return (0);
1174 }
1175 
1176 /*********************************************************************
1177  *
1178  *  This routine maps the mbufs to tx descriptors.
1179  *
1180  *  return 0 on success, positive on failure
1181  **********************************************************************/
1182 u_int
1183 em_encap(struct em_queue *que, struct mbuf *m)
1184 {
1185 	struct em_softc *sc = que->sc;
1186 	struct em_packet *pkt;
1187 	struct em_tx_desc *desc;
1188 	bus_dmamap_t map;
1189 	u_int32_t txd_upper, txd_lower;
1190 	u_int head, last, used = 0;
1191 	int i, j;
1192 
1193 	/* For 82544 Workaround */
1194 	DESC_ARRAY		desc_array;
1195 	u_int32_t		array_elements;
1196 
1197 	/* get a dmamap for this packet from the next free slot */
1198 	head = que->tx.sc_tx_desc_head;
1199 	pkt = &que->tx.sc_tx_pkts_ring[head];
1200 	map = pkt->pkt_map;
1201 
1202 	switch (bus_dmamap_load_mbuf(sc->sc_dmat, map, m, BUS_DMA_NOWAIT)) {
1203 	case 0:
1204 		break;
1205 	case EFBIG:
1206 		if (m_defrag(m, M_DONTWAIT) == 0 &&
1207 		    bus_dmamap_load_mbuf(sc->sc_dmat, map, m,
1208 		     BUS_DMA_NOWAIT) == 0)
1209 			break;
1210 
1211 		/* FALLTHROUGH */
1212 	default:
1213 		sc->no_tx_dma_setup++;
1214 		return (0);
1215 	}
1216 
1217 	bus_dmamap_sync(sc->sc_dmat, map,
1218 	    0, map->dm_mapsize,
1219 	    BUS_DMASYNC_PREWRITE);
1220 
1221 	if (sc->hw.mac_type == em_82547) {
1222 		bus_dmamap_sync(sc->sc_dmat, que->tx.sc_tx_dma.dma_map,
1223 		    0, que->tx.sc_tx_dma.dma_map->dm_mapsize,
1224 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1225 	}
1226 
1227 	if (sc->hw.mac_type >= em_82575 && sc->hw.mac_type <= em_i210) {
1228 		used += em_tx_ctx_setup(que, m, head, &txd_upper, &txd_lower);
1229 	} else if (sc->hw.mac_type >= em_82543) {
1230 		used += em_transmit_checksum_setup(que, m, head,
1231 		    &txd_upper, &txd_lower);
1232 	} else {
1233 		txd_upper = txd_lower = 0;
1234 	}
1235 
1236 	head += used;
1237 	if (head >= sc->sc_tx_slots)
1238 		head -= sc->sc_tx_slots;
1239 
1240 	for (i = 0; i < map->dm_nsegs; i++) {
1241 		/* If sc is 82544 and on PCI-X bus */
1242 		if (sc->pcix_82544) {
1243 			/*
1244 			 * Check the Address and Length combination and
1245 			 * split the data accordingly
1246 			 */
1247 			array_elements = em_fill_descriptors(
1248 			    map->dm_segs[i].ds_addr, map->dm_segs[i].ds_len,
1249 			    &desc_array);
1250 			for (j = 0; j < array_elements; j++) {
1251 				desc = &que->tx.sc_tx_desc_ring[head];
1252 
1253 				desc->buffer_addr = htole64(
1254 					desc_array.descriptor[j].address);
1255 				desc->lower.data = htole32(
1256 					(que->tx.sc_txd_cmd | txd_lower |
1257 					 (u_int16_t)desc_array.descriptor[j].length));
1258 				desc->upper.data = htole32(txd_upper);
1259 
1260 				last = head;
1261 				if (++head == sc->sc_tx_slots)
1262 					head = 0;
1263 
1264 				used++;
1265 			}
1266 		} else {
1267 			desc = &que->tx.sc_tx_desc_ring[head];
1268 
1269 			desc->buffer_addr = htole64(map->dm_segs[i].ds_addr);
1270 			desc->lower.data = htole32(que->tx.sc_txd_cmd |
1271 			    txd_lower | map->dm_segs[i].ds_len);
1272 			desc->upper.data = htole32(txd_upper);
1273 
1274 			last = head;
1275 			if (++head == sc->sc_tx_slots)
1276 	        		head = 0;
1277 
1278 			used++;
1279 		}
1280 	}
1281 
1282 #if NVLAN > 0
1283 	/* Find out if we are in VLAN mode */
1284 	if (m->m_flags & M_VLANTAG && (sc->hw.mac_type < em_82575 ||
1285 	    sc->hw.mac_type > em_i210)) {
1286 		/* Set the VLAN id */
1287 		desc->upper.fields.special = htole16(m->m_pkthdr.ether_vtag);
1288 
1289 		/* Tell hardware to add tag */
1290 		desc->lower.data |= htole32(E1000_TXD_CMD_VLE);
1291 	}
1292 #endif
1293 
1294 	/* mark the packet with the mbuf and last desc slot */
1295 	pkt->pkt_m = m;
1296 	pkt->pkt_eop = last;
1297 
1298 	que->tx.sc_tx_desc_head = head;
1299 
1300 	/*
1301 	 * Last Descriptor of Packet
1302 	 * needs End Of Packet (EOP)
1303 	 * and Report Status (RS)
1304 	 */
1305 	desc->lower.data |= htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
1306 
1307 	if (sc->hw.mac_type == em_82547) {
1308 		bus_dmamap_sync(sc->sc_dmat, que->tx.sc_tx_dma.dma_map,
1309 		    0, que->tx.sc_tx_dma.dma_map->dm_mapsize,
1310 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1311 	}
1312 
1313 	return (used);
1314 }
1315 
1316 /*********************************************************************
1317  *
1318  * 82547 workaround to avoid controller hang in half-duplex environment.
1319  * The workaround is to avoid queuing a large packet that would span
1320  * the internal Tx FIFO ring boundary. We need to reset the FIFO pointers
1321  * in this case. We do that only when FIFO is quiescent.
1322  *
1323  **********************************************************************/
1324 void
1325 em_82547_move_tail_locked(struct em_softc *sc)
1326 {
1327 	uint16_t hw_tdt;
1328 	uint16_t sw_tdt;
1329 	struct em_tx_desc *tx_desc;
1330 	uint16_t length = 0;
1331 	boolean_t eop = 0;
1332 	struct em_queue *que = sc->queues; /* single queue chip */
1333 
1334 	hw_tdt = E1000_READ_REG(&sc->hw, TDT(que->me));
1335 	sw_tdt = que->tx.sc_tx_desc_head;
1336 
1337 	while (hw_tdt != sw_tdt) {
1338 		tx_desc = &que->tx.sc_tx_desc_ring[hw_tdt];
1339 		length += tx_desc->lower.flags.length;
1340 		eop = tx_desc->lower.data & E1000_TXD_CMD_EOP;
1341 		if (++hw_tdt == sc->sc_tx_slots)
1342 			hw_tdt = 0;
1343 
1344 		if (eop) {
1345 			if (em_82547_fifo_workaround(sc, length)) {
1346 				sc->tx_fifo_wrk_cnt++;
1347 				timeout_add(&sc->tx_fifo_timer_handle, 1);
1348 				break;
1349 			}
1350 			E1000_WRITE_REG(&sc->hw, TDT(que->me), hw_tdt);
1351 			em_82547_update_fifo_head(sc, length);
1352 			length = 0;
1353 		}
1354 	}
1355 }
1356 
1357 void
1358 em_82547_move_tail(void *arg)
1359 {
1360 	struct em_softc *sc = arg;
1361 	int s;
1362 
1363 	s = splnet();
1364 	em_82547_move_tail_locked(sc);
1365 	splx(s);
1366 }
1367 
1368 int
1369 em_82547_fifo_workaround(struct em_softc *sc, int len)
1370 {
1371 	int fifo_space, fifo_pkt_len;
1372 
1373 	fifo_pkt_len = EM_ROUNDUP(len + EM_FIFO_HDR, EM_FIFO_HDR);
1374 
1375 	if (sc->link_duplex == HALF_DUPLEX) {
1376 		fifo_space = sc->tx_fifo_size - sc->tx_fifo_head;
1377 
1378 		if (fifo_pkt_len >= (EM_82547_PKT_THRESH + fifo_space)) {
1379 			if (em_82547_tx_fifo_reset(sc))
1380 				return (0);
1381 			else
1382 				return (1);
1383 		}
1384 	}
1385 
1386 	return (0);
1387 }
1388 
1389 void
1390 em_82547_update_fifo_head(struct em_softc *sc, int len)
1391 {
1392 	int fifo_pkt_len = EM_ROUNDUP(len + EM_FIFO_HDR, EM_FIFO_HDR);
1393 
1394 	/* tx_fifo_head is always 16 byte aligned */
1395 	sc->tx_fifo_head += fifo_pkt_len;
1396 	if (sc->tx_fifo_head >= sc->tx_fifo_size)
1397 		sc->tx_fifo_head -= sc->tx_fifo_size;
1398 }
1399 
1400 int
1401 em_82547_tx_fifo_reset(struct em_softc *sc)
1402 {
1403 	uint32_t tctl;
1404 	struct em_queue *que = sc->queues; /* single queue chip */
1405 
1406 	if ((E1000_READ_REG(&sc->hw, TDT(que->me)) ==
1407 	     E1000_READ_REG(&sc->hw, TDH(que->me))) &&
1408 	    (E1000_READ_REG(&sc->hw, TDFT) ==
1409 	     E1000_READ_REG(&sc->hw, TDFH)) &&
1410 	    (E1000_READ_REG(&sc->hw, TDFTS) ==
1411 	     E1000_READ_REG(&sc->hw, TDFHS)) &&
1412 	    (E1000_READ_REG(&sc->hw, TDFPC) == 0)) {
1413 
1414 		/* Disable TX unit */
1415 		tctl = E1000_READ_REG(&sc->hw, TCTL);
1416 		E1000_WRITE_REG(&sc->hw, TCTL, tctl & ~E1000_TCTL_EN);
1417 
1418 		/* Reset FIFO pointers */
1419 		E1000_WRITE_REG(&sc->hw, TDFT, sc->tx_head_addr);
1420 		E1000_WRITE_REG(&sc->hw, TDFH, sc->tx_head_addr);
1421 		E1000_WRITE_REG(&sc->hw, TDFTS, sc->tx_head_addr);
1422 		E1000_WRITE_REG(&sc->hw, TDFHS, sc->tx_head_addr);
1423 
1424 		/* Re-enable TX unit */
1425 		E1000_WRITE_REG(&sc->hw, TCTL, tctl);
1426 		E1000_WRITE_FLUSH(&sc->hw);
1427 
1428 		sc->tx_fifo_head = 0;
1429 		sc->tx_fifo_reset_cnt++;
1430 
1431 		return (TRUE);
1432 	} else
1433 		return (FALSE);
1434 }
1435 
1436 void
1437 em_iff(struct em_softc *sc)
1438 {
1439 	struct ifnet *ifp = &sc->sc_ac.ac_if;
1440 	struct arpcom *ac = &sc->sc_ac;
1441 	u_int32_t reg_rctl = 0;
1442 	u_int8_t  mta[MAX_NUM_MULTICAST_ADDRESSES * ETH_LENGTH_OF_ADDRESS];
1443 	struct ether_multi *enm;
1444 	struct ether_multistep step;
1445 	int i = 0;
1446 
1447 	IOCTL_DEBUGOUT("em_iff: begin");
1448 
1449 	if (sc->hw.mac_type == em_82542_rev2_0) {
1450 		reg_rctl = E1000_READ_REG(&sc->hw, RCTL);
1451 		if (sc->hw.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
1452 			em_pci_clear_mwi(&sc->hw);
1453 		reg_rctl |= E1000_RCTL_RST;
1454 		E1000_WRITE_REG(&sc->hw, RCTL, reg_rctl);
1455 		msec_delay(5);
1456 	}
1457 
1458 	reg_rctl = E1000_READ_REG(&sc->hw, RCTL);
1459 	reg_rctl &= ~(E1000_RCTL_MPE | E1000_RCTL_UPE);
1460 	ifp->if_flags &= ~IFF_ALLMULTI;
1461 
1462 	if (ifp->if_flags & IFF_PROMISC || ac->ac_multirangecnt > 0 ||
1463 	    ac->ac_multicnt > MAX_NUM_MULTICAST_ADDRESSES) {
1464 		ifp->if_flags |= IFF_ALLMULTI;
1465 		reg_rctl |= E1000_RCTL_MPE;
1466 		if (ifp->if_flags & IFF_PROMISC)
1467 			reg_rctl |= E1000_RCTL_UPE;
1468 	} else {
1469 		ETHER_FIRST_MULTI(step, ac, enm);
1470 		while (enm != NULL) {
1471 			bcopy(enm->enm_addrlo, mta + i, ETH_LENGTH_OF_ADDRESS);
1472 			i += ETH_LENGTH_OF_ADDRESS;
1473 
1474 			ETHER_NEXT_MULTI(step, enm);
1475 		}
1476 
1477 		em_mc_addr_list_update(&sc->hw, mta, ac->ac_multicnt, 0, 1);
1478 	}
1479 
1480 	E1000_WRITE_REG(&sc->hw, RCTL, reg_rctl);
1481 
1482 	if (sc->hw.mac_type == em_82542_rev2_0) {
1483 		reg_rctl = E1000_READ_REG(&sc->hw, RCTL);
1484 		reg_rctl &= ~E1000_RCTL_RST;
1485 		E1000_WRITE_REG(&sc->hw, RCTL, reg_rctl);
1486 		msec_delay(5);
1487 		if (sc->hw.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
1488 			em_pci_set_mwi(&sc->hw);
1489 	}
1490 }
1491 
1492 /*********************************************************************
1493  *  Timer routine
1494  *
1495  *  This routine checks for link status and updates statistics.
1496  *
1497  **********************************************************************/
1498 
1499 void
1500 em_local_timer(void *arg)
1501 {
1502 	struct em_softc *sc = arg;
1503 	int s;
1504 
1505 	timeout_add_sec(&sc->timer_handle, 1);
1506 
1507 	s = splnet();
1508 	em_smartspeed(sc);
1509 	splx(s);
1510 
1511 #if NKSTAT > 0
1512 	if (sc->kstat != NULL && mtx_enter_try(&sc->kstat_mtx)) {
1513 		em_kstat_read(sc->kstat);
1514 		mtx_leave(&sc->kstat_mtx);
1515 	}
1516 #endif
1517 }
1518 
1519 void
1520 em_update_link_status(struct em_softc *sc)
1521 {
1522 	struct ifnet *ifp = &sc->sc_ac.ac_if;
1523 	u_char link_state;
1524 
1525 	if (E1000_READ_REG(&sc->hw, STATUS) & E1000_STATUS_LU) {
1526 		if (sc->link_active == 0) {
1527 			em_get_speed_and_duplex(&sc->hw,
1528 						&sc->link_speed,
1529 						&sc->link_duplex);
1530 			/* Check if we may set SPEED_MODE bit on PCI-E */
1531 			if ((sc->link_speed == SPEED_1000) &&
1532 			    ((sc->hw.mac_type == em_82571) ||
1533 			    (sc->hw.mac_type == em_82572) ||
1534 			    (sc->hw.mac_type == em_82575) ||
1535 			    (sc->hw.mac_type == em_82576) ||
1536 			    (sc->hw.mac_type == em_82580))) {
1537 				int tarc0;
1538 
1539 				tarc0 = E1000_READ_REG(&sc->hw, TARC0);
1540 				tarc0 |= SPEED_MODE_BIT;
1541 				E1000_WRITE_REG(&sc->hw, TARC0, tarc0);
1542 			}
1543 			sc->link_active = 1;
1544 			sc->smartspeed = 0;
1545 			ifp->if_baudrate = IF_Mbps(sc->link_speed);
1546 		}
1547 		link_state = (sc->link_duplex == FULL_DUPLEX) ?
1548 		    LINK_STATE_FULL_DUPLEX : LINK_STATE_HALF_DUPLEX;
1549 	} else {
1550 		if (sc->link_active == 1) {
1551 			ifp->if_baudrate = sc->link_speed = 0;
1552 			sc->link_duplex = 0;
1553 			sc->link_active = 0;
1554 		}
1555 		link_state = LINK_STATE_DOWN;
1556 	}
1557 	if (ifp->if_link_state != link_state) {
1558 		ifp->if_link_state = link_state;
1559 		if_link_state_change(ifp);
1560 	}
1561 }
1562 
1563 /*********************************************************************
1564  *
1565  *  This routine disables all traffic on the adapter by issuing a
1566  *  global reset on the MAC and deallocates TX/RX buffers.
1567  *
1568  **********************************************************************/
1569 
1570 void
1571 em_stop(void *arg, int softonly)
1572 {
1573 	struct em_softc *sc = arg;
1574 	struct em_queue *que = sc->queues; /* Use only first queue. */
1575 	struct ifnet   *ifp = &sc->sc_ac.ac_if;
1576 
1577 	/* Tell the stack that the interface is no longer active */
1578 	ifp->if_flags &= ~IFF_RUNNING;
1579 
1580 	INIT_DEBUGOUT("em_stop: begin");
1581 
1582 	timeout_del(&que->rx_refill);
1583 	timeout_del(&sc->timer_handle);
1584 	timeout_del(&sc->tx_fifo_timer_handle);
1585 
1586 	if (!softonly)
1587 		em_disable_intr(sc);
1588 	if (sc->hw.mac_type >= em_pch_spt)
1589 		em_flush_desc_rings(sc);
1590 	if (!softonly)
1591 		em_reset_hw(&sc->hw);
1592 
1593 	intr_barrier(sc->sc_intrhand);
1594 	ifq_barrier(&ifp->if_snd);
1595 
1596 	KASSERT((ifp->if_flags & IFF_RUNNING) == 0);
1597 
1598 	ifq_clr_oactive(&ifp->if_snd);
1599 	ifp->if_timer = 0;
1600 
1601 	em_free_transmit_structures(sc);
1602 	em_free_receive_structures(sc);
1603 }
1604 
1605 /*********************************************************************
1606  *
1607  *  Determine hardware revision.
1608  *
1609  **********************************************************************/
1610 void
1611 em_identify_hardware(struct em_softc *sc)
1612 {
1613 	u_int32_t reg;
1614 	struct pci_attach_args *pa = &sc->osdep.em_pa;
1615 
1616 	/* Make sure our PCI config space has the necessary stuff set */
1617 	sc->hw.pci_cmd_word = pci_conf_read(pa->pa_pc, pa->pa_tag,
1618 					    PCI_COMMAND_STATUS_REG);
1619 
1620 	/* Save off the information about this board */
1621 	sc->hw.vendor_id = PCI_VENDOR(pa->pa_id);
1622 	sc->hw.device_id = PCI_PRODUCT(pa->pa_id);
1623 
1624 	reg = pci_conf_read(pa->pa_pc, pa->pa_tag, PCI_CLASS_REG);
1625 	sc->hw.revision_id = PCI_REVISION(reg);
1626 
1627 	reg = pci_conf_read(pa->pa_pc, pa->pa_tag, PCI_SUBSYS_ID_REG);
1628 	sc->hw.subsystem_vendor_id = PCI_VENDOR(reg);
1629 	sc->hw.subsystem_id = PCI_PRODUCT(reg);
1630 
1631 	/* Identify the MAC */
1632 	if (em_set_mac_type(&sc->hw))
1633 		printf("%s: Unknown MAC Type\n", DEVNAME(sc));
1634 
1635 	if (sc->hw.mac_type == em_pchlan)
1636 		sc->hw.revision_id = PCI_PRODUCT(pa->pa_id) & 0x0f;
1637 
1638 	if (sc->hw.mac_type == em_82541 ||
1639 	    sc->hw.mac_type == em_82541_rev_2 ||
1640 	    sc->hw.mac_type == em_82547 ||
1641 	    sc->hw.mac_type == em_82547_rev_2)
1642 		sc->hw.phy_init_script = TRUE;
1643 }
1644 
1645 void
1646 em_legacy_irq_quirk_spt(struct em_softc *sc)
1647 {
1648 	uint32_t	reg;
1649 
1650 	/* Legacy interrupt: SPT needs a quirk. */
1651 	if (sc->hw.mac_type != em_pch_spt && sc->hw.mac_type != em_pch_cnp &&
1652 	    sc->hw.mac_type != em_pch_tgp && sc->hw.mac_type != em_pch_adp)
1653 		return;
1654 	if (sc->legacy_irq == 0)
1655 		return;
1656 
1657 	reg = EM_READ_REG(&sc->hw, E1000_FEXTNVM7);
1658 	reg |= E1000_FEXTNVM7_SIDE_CLK_UNGATE;
1659 	EM_WRITE_REG(&sc->hw, E1000_FEXTNVM7, reg);
1660 
1661 	reg = EM_READ_REG(&sc->hw, E1000_FEXTNVM9);
1662 	reg |= E1000_FEXTNVM9_IOSFSB_CLKGATE_DIS |
1663 	    E1000_FEXTNVM9_IOSFSB_CLKREQ_DIS;
1664 	EM_WRITE_REG(&sc->hw, E1000_FEXTNVM9, reg);
1665 }
1666 
1667 int
1668 em_allocate_pci_resources(struct em_softc *sc)
1669 {
1670 	int		val, rid;
1671 	struct pci_attach_args *pa = &sc->osdep.em_pa;
1672 	struct em_queue	       *que = NULL;
1673 
1674 	val = pci_conf_read(pa->pa_pc, pa->pa_tag, EM_MMBA);
1675 	if (PCI_MAPREG_TYPE(val) != PCI_MAPREG_TYPE_MEM) {
1676 		printf(": mmba is not mem space\n");
1677 		return (ENXIO);
1678 	}
1679 	if (pci_mapreg_map(pa, EM_MMBA, PCI_MAPREG_MEM_TYPE(val), 0,
1680 	    &sc->osdep.mem_bus_space_tag, &sc->osdep.mem_bus_space_handle,
1681 	    &sc->osdep.em_membase, &sc->osdep.em_memsize, 0)) {
1682 		printf(": cannot find mem space\n");
1683 		return (ENXIO);
1684 	}
1685 
1686 	switch (sc->hw.mac_type) {
1687 	case em_82544:
1688 	case em_82540:
1689 	case em_82545:
1690 	case em_82546:
1691 	case em_82541:
1692 	case em_82541_rev_2:
1693 		/* Figure out where our I/O BAR is ? */
1694 		for (rid = PCI_MAPREG_START; rid < PCI_MAPREG_END;) {
1695 			val = pci_conf_read(pa->pa_pc, pa->pa_tag, rid);
1696 			if (PCI_MAPREG_TYPE(val) == PCI_MAPREG_TYPE_IO) {
1697 				sc->io_rid = rid;
1698 				break;
1699 			}
1700 			rid += 4;
1701 			if (PCI_MAPREG_MEM_TYPE(val) ==
1702 			    PCI_MAPREG_MEM_TYPE_64BIT)
1703 				rid += 4;	/* skip high bits, too */
1704 		}
1705 
1706 		if (pci_mapreg_map(pa, rid, PCI_MAPREG_TYPE_IO, 0,
1707 		    &sc->osdep.io_bus_space_tag, &sc->osdep.io_bus_space_handle,
1708 		    &sc->osdep.em_iobase, &sc->osdep.em_iosize, 0)) {
1709 			printf(": cannot find i/o space\n");
1710 			return (ENXIO);
1711 		}
1712 
1713 		sc->hw.io_base = 0;
1714 		break;
1715 	default:
1716 		break;
1717 	}
1718 
1719 	sc->osdep.em_flashoffset = 0;
1720 	/* for ICH8 and family we need to find the flash memory */
1721 	if (sc->hw.mac_type >= em_pch_spt) {
1722 		sc->osdep.flash_bus_space_tag = sc->osdep.mem_bus_space_tag;
1723 		sc->osdep.flash_bus_space_handle = sc->osdep.mem_bus_space_handle;
1724 		sc->osdep.em_flashbase = 0;
1725 		sc->osdep.em_flashsize = 0;
1726 		sc->osdep.em_flashoffset = 0xe000;
1727 	} else if (IS_ICH8(sc->hw.mac_type)) {
1728 		val = pci_conf_read(pa->pa_pc, pa->pa_tag, EM_FLASH);
1729 		if (PCI_MAPREG_TYPE(val) != PCI_MAPREG_TYPE_MEM) {
1730 			printf(": flash is not mem space\n");
1731 			return (ENXIO);
1732 		}
1733 
1734 		if (pci_mapreg_map(pa, EM_FLASH, PCI_MAPREG_MEM_TYPE(val), 0,
1735 		    &sc->osdep.flash_bus_space_tag, &sc->osdep.flash_bus_space_handle,
1736 		    &sc->osdep.em_flashbase, &sc->osdep.em_flashsize, 0)) {
1737 			printf(": cannot find mem space\n");
1738 			return (ENXIO);
1739 		}
1740         }
1741 
1742 	sc->osdep.dev = (struct device *)sc;
1743 	sc->hw.back = &sc->osdep;
1744 
1745 	/* Only one queue for the moment. */
1746 	que = malloc(sizeof(struct em_queue), M_DEVBUF, M_NOWAIT | M_ZERO);
1747 	if (que == NULL) {
1748 		printf(": unable to allocate queue memory\n");
1749 		return (ENOMEM);
1750 	}
1751 	que->me = 0;
1752 	que->sc = sc;
1753 	timeout_set(&que->rx_refill, em_rxrefill, que);
1754 
1755 	sc->queues = que;
1756 	sc->num_queues = 1;
1757 	sc->msix = 0;
1758 	sc->legacy_irq = 0;
1759 	if (em_allocate_msix(sc) && em_allocate_legacy(sc))
1760 		return (ENXIO);
1761 
1762 	/*
1763 	 * the ICP_xxxx device has multiple, duplicate register sets for
1764 	 * use when it is being used as a network processor. Disable those
1765 	 * registers here, as they are not necessary in this context and
1766 	 * can confuse the system
1767 	 */
1768 	if(sc->hw.mac_type == em_icp_xxxx) {
1769 		int offset;
1770 		pcireg_t val;
1771 
1772 		if (!pci_get_capability(sc->osdep.em_pa.pa_pc,
1773 		    sc->osdep.em_pa.pa_tag, PCI_CAP_ID_ST, &offset, &val)) {
1774 			return (0);
1775 		}
1776 		offset += PCI_ST_SMIA_OFFSET;
1777 		pci_conf_write(sc->osdep.em_pa.pa_pc, sc->osdep.em_pa.pa_tag,
1778 		    offset, 0x06);
1779 		E1000_WRITE_REG(&sc->hw, IMC1, ~0x0);
1780 		E1000_WRITE_REG(&sc->hw, IMC2, ~0x0);
1781 	}
1782 	return (0);
1783 }
1784 
1785 void
1786 em_free_pci_resources(struct em_softc *sc)
1787 {
1788 	struct pci_attach_args *pa = &sc->osdep.em_pa;
1789 	pci_chipset_tag_t	pc = pa->pa_pc;
1790 	struct em_queue	       *que = NULL;
1791 	if (sc->sc_intrhand)
1792 		pci_intr_disestablish(pc, sc->sc_intrhand);
1793 	sc->sc_intrhand = 0;
1794 
1795 	if (sc->osdep.em_flashbase)
1796 		bus_space_unmap(sc->osdep.flash_bus_space_tag, sc->osdep.flash_bus_space_handle,
1797 				sc->osdep.em_flashsize);
1798 	sc->osdep.em_flashbase = 0;
1799 
1800 	if (sc->osdep.em_iobase)
1801 		bus_space_unmap(sc->osdep.io_bus_space_tag, sc->osdep.io_bus_space_handle,
1802 				sc->osdep.em_iosize);
1803 	sc->osdep.em_iobase = 0;
1804 
1805 	if (sc->osdep.em_membase)
1806 		bus_space_unmap(sc->osdep.mem_bus_space_tag, sc->osdep.mem_bus_space_handle,
1807 				sc->osdep.em_memsize);
1808 	sc->osdep.em_membase = 0;
1809 
1810 	FOREACH_QUEUE(sc, que) {
1811 		if (que->rx.sc_rx_desc_ring != NULL) {
1812 			que->rx.sc_rx_desc_ring = NULL;
1813 			em_dma_free(sc, &que->rx.sc_rx_dma);
1814 		}
1815 		if (que->tx.sc_tx_desc_ring != NULL) {
1816 			que->tx.sc_tx_desc_ring = NULL;
1817 			em_dma_free(sc, &que->tx.sc_tx_dma);
1818 		}
1819 		if (que->tag)
1820 			pci_intr_disestablish(pc, que->tag);
1821 		que->tag = NULL;
1822 		que->eims = 0;
1823 		que->me = 0;
1824 		que->sc = NULL;
1825 	}
1826 	sc->legacy_irq = 0;
1827 	sc->msix_linkvec = 0;
1828 	sc->msix_queuesmask = 0;
1829 	if (sc->queues)
1830 		free(sc->queues, M_DEVBUF,
1831 		    sc->num_queues * sizeof(struct em_queue));
1832 	sc->num_queues = 0;
1833 	sc->queues = NULL;
1834 }
1835 
1836 /*********************************************************************
1837  *
1838  *  Initialize the hardware to a configuration as specified by the
1839  *  em_softc structure. The controller is reset, the EEPROM is
1840  *  verified, the MAC address is set, then the shared initialization
1841  *  routines are called.
1842  *
1843  **********************************************************************/
1844 int
1845 em_hardware_init(struct em_softc *sc)
1846 {
1847 	uint32_t ret_val;
1848 	u_int16_t rx_buffer_size;
1849 
1850 	INIT_DEBUGOUT("em_hardware_init: begin");
1851 	if (sc->hw.mac_type >= em_pch_spt)
1852 		em_flush_desc_rings(sc);
1853 	/* Issue a global reset */
1854 	em_reset_hw(&sc->hw);
1855 
1856 	/* When hardware is reset, fifo_head is also reset */
1857 	sc->tx_fifo_head = 0;
1858 
1859 	/* Make sure we have a good EEPROM before we read from it */
1860 	if (em_get_flash_presence_i210(&sc->hw) &&
1861 	    em_validate_eeprom_checksum(&sc->hw) < 0) {
1862 		/*
1863 		 * Some PCIe parts fail the first check due to
1864 		 * the link being in sleep state, call it again,
1865 		 * if it fails a second time its a real issue.
1866 		 */
1867 		if (em_validate_eeprom_checksum(&sc->hw) < 0) {
1868 			printf("%s: The EEPROM Checksum Is Not Valid\n",
1869 			       DEVNAME(sc));
1870 			return (EIO);
1871 		}
1872 	}
1873 
1874 	if (em_get_flash_presence_i210(&sc->hw) &&
1875 	    em_read_part_num(&sc->hw, &(sc->part_num)) < 0) {
1876 		printf("%s: EEPROM read error while reading part number\n",
1877 		       DEVNAME(sc));
1878 		return (EIO);
1879 	}
1880 
1881 	/* Set up smart power down as default off on newer adapters */
1882 	if (!em_smart_pwr_down &&
1883 	     (sc->hw.mac_type == em_82571 ||
1884 	      sc->hw.mac_type == em_82572 ||
1885 	      sc->hw.mac_type == em_82575 ||
1886 	      sc->hw.mac_type == em_82576 ||
1887 	      sc->hw.mac_type == em_82580 ||
1888 	      sc->hw.mac_type == em_i210 ||
1889 	      sc->hw.mac_type == em_i350 )) {
1890 		uint16_t phy_tmp = 0;
1891 
1892 		/* Speed up time to link by disabling smart power down */
1893 		em_read_phy_reg(&sc->hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
1894 		phy_tmp &= ~IGP02E1000_PM_SPD;
1895 		em_write_phy_reg(&sc->hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
1896 	}
1897 
1898 	em_legacy_irq_quirk_spt(sc);
1899 
1900 	/*
1901 	 * These parameters control the automatic generation (Tx) and
1902 	 * response (Rx) to Ethernet PAUSE frames.
1903 	 * - High water mark should allow for at least two frames to be
1904 	 *   received after sending an XOFF.
1905 	 * - Low water mark works best when it is very near the high water mark.
1906 	 *   This allows the receiver to restart by sending XON when it has
1907 	 *   drained a bit.  Here we use an arbitrary value of 1500 which will
1908 	 *   restart after one full frame is pulled from the buffer.  There
1909 	 *   could be several smaller frames in the buffer and if so they will
1910 	 *   not trigger the XON until their total number reduces the buffer
1911 	 *   by 1500.
1912 	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
1913 	 */
1914 	rx_buffer_size = ((E1000_READ_REG(&sc->hw, PBA) & 0xffff) << 10 );
1915 
1916 	sc->hw.fc_high_water = rx_buffer_size -
1917 	    EM_ROUNDUP(sc->hw.max_frame_size, 1024);
1918 	sc->hw.fc_low_water = sc->hw.fc_high_water - 1500;
1919 	if (sc->hw.mac_type == em_80003es2lan)
1920 		sc->hw.fc_pause_time = 0xFFFF;
1921 	else
1922 		sc->hw.fc_pause_time = 1000;
1923 	sc->hw.fc_send_xon = TRUE;
1924 	sc->hw.fc = E1000_FC_FULL;
1925 
1926 	em_disable_aspm(sc);
1927 
1928 	if ((ret_val = em_init_hw(sc)) != 0) {
1929 		if (ret_val == E1000_DEFER_INIT) {
1930 			INIT_DEBUGOUT("\nHardware Initialization Deferred ");
1931 			return (EAGAIN);
1932 		}
1933 		printf("\n%s: Hardware Initialization Failed: %d\n",
1934 		       DEVNAME(sc), ret_val);
1935 		return (EIO);
1936 	}
1937 
1938 	em_check_for_link(&sc->hw);
1939 
1940 	return (0);
1941 }
1942 
1943 /*********************************************************************
1944  *
1945  *  Setup networking device structure and register an interface.
1946  *
1947  **********************************************************************/
1948 void
1949 em_setup_interface(struct em_softc *sc)
1950 {
1951 	struct ifnet   *ifp;
1952 	uint64_t fiber_type = IFM_1000_SX;
1953 
1954 	INIT_DEBUGOUT("em_setup_interface: begin");
1955 
1956 	ifp = &sc->sc_ac.ac_if;
1957 	strlcpy(ifp->if_xname, DEVNAME(sc), IFNAMSIZ);
1958 	ifp->if_softc = sc;
1959 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1960 	ifp->if_xflags = IFXF_MPSAFE;
1961 	ifp->if_ioctl = em_ioctl;
1962 	ifp->if_qstart = em_start;
1963 	ifp->if_watchdog = em_watchdog;
1964 	ifp->if_hardmtu =
1965 		sc->hw.max_frame_size - ETHER_HDR_LEN - ETHER_CRC_LEN;
1966 	ifq_set_maxlen(&ifp->if_snd, sc->sc_tx_slots - 1);
1967 
1968 	ifp->if_capabilities = IFCAP_VLAN_MTU;
1969 
1970 #if NVLAN > 0
1971 	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING;
1972 #endif
1973 
1974 	if (sc->hw.mac_type >= em_82543) {
1975 		ifp->if_capabilities |= IFCAP_CSUM_TCPv4 | IFCAP_CSUM_UDPv4;
1976 	}
1977 	if (sc->hw.mac_type >= em_82575 && sc->hw.mac_type <= em_i210) {
1978 		ifp->if_capabilities |= IFCAP_CSUM_IPv4;
1979 		ifp->if_capabilities |= IFCAP_CSUM_TCPv6 | IFCAP_CSUM_UDPv6;
1980 	}
1981 
1982 	/*
1983 	 * Specify the media types supported by this adapter and register
1984 	 * callbacks to update media and link information
1985 	 */
1986 	ifmedia_init(&sc->media, IFM_IMASK, em_media_change,
1987 		     em_media_status);
1988 	if (sc->hw.media_type == em_media_type_fiber ||
1989 	    sc->hw.media_type == em_media_type_internal_serdes) {
1990 		if (sc->hw.mac_type == em_82545)
1991 			fiber_type = IFM_1000_LX;
1992 		ifmedia_add(&sc->media, IFM_ETHER | fiber_type | IFM_FDX,
1993 			    0, NULL);
1994 		ifmedia_add(&sc->media, IFM_ETHER | fiber_type,
1995 			    0, NULL);
1996 	} else {
1997 		ifmedia_add(&sc->media, IFM_ETHER | IFM_10_T, 0, NULL);
1998 		ifmedia_add(&sc->media, IFM_ETHER | IFM_10_T | IFM_FDX,
1999 			    0, NULL);
2000 		ifmedia_add(&sc->media, IFM_ETHER | IFM_100_TX,
2001 			    0, NULL);
2002 		ifmedia_add(&sc->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2003 			    0, NULL);
2004 		if (sc->hw.phy_type != em_phy_ife) {
2005 			ifmedia_add(&sc->media, IFM_ETHER | IFM_1000_T | IFM_FDX,
2006 				    0, NULL);
2007 			ifmedia_add(&sc->media, IFM_ETHER | IFM_1000_T, 0, NULL);
2008 		}
2009 	}
2010 	ifmedia_add(&sc->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2011 	ifmedia_set(&sc->media, IFM_ETHER | IFM_AUTO);
2012 
2013 	if_attach(ifp);
2014 	ether_ifattach(ifp);
2015 	em_enable_intr(sc);
2016 }
2017 
2018 int
2019 em_detach(struct device *self, int flags)
2020 {
2021 	struct em_softc *sc = (struct em_softc *)self;
2022 	struct ifnet *ifp = &sc->sc_ac.ac_if;
2023 	struct pci_attach_args *pa = &sc->osdep.em_pa;
2024 	pci_chipset_tag_t	pc = pa->pa_pc;
2025 
2026 	if (sc->sc_intrhand)
2027 		pci_intr_disestablish(pc, sc->sc_intrhand);
2028 	sc->sc_intrhand = 0;
2029 
2030 	em_stop(sc, 1);
2031 
2032 	em_free_pci_resources(sc);
2033 
2034 	ether_ifdetach(ifp);
2035 	if_detach(ifp);
2036 
2037 	return (0);
2038 }
2039 
2040 int
2041 em_activate(struct device *self, int act)
2042 {
2043 	struct em_softc *sc = (struct em_softc *)self;
2044 	struct ifnet *ifp = &sc->sc_ac.ac_if;
2045 	int rv = 0;
2046 
2047 	switch (act) {
2048 	case DVACT_SUSPEND:
2049 		if (ifp->if_flags & IFF_RUNNING)
2050 			em_stop(sc, 0);
2051 		/* We have no children atm, but we will soon */
2052 		rv = config_activate_children(self, act);
2053 		break;
2054 	case DVACT_RESUME:
2055 		if (ifp->if_flags & IFF_UP)
2056 			em_init(sc);
2057 		break;
2058 	default:
2059 		rv = config_activate_children(self, act);
2060 		break;
2061 	}
2062 	return (rv);
2063 }
2064 
2065 /*********************************************************************
2066  *
2067  *  Workaround for SmartSpeed on 82541 and 82547 controllers
2068  *
2069  **********************************************************************/
2070 void
2071 em_smartspeed(struct em_softc *sc)
2072 {
2073 	uint16_t phy_tmp;
2074 
2075 	if (sc->link_active || (sc->hw.phy_type != em_phy_igp) ||
2076 	    !sc->hw.autoneg || !(sc->hw.autoneg_advertised & ADVERTISE_1000_FULL))
2077 		return;
2078 
2079 	if (sc->smartspeed == 0) {
2080 		/* If Master/Slave config fault is asserted twice,
2081 		 * we assume back-to-back */
2082 		em_read_phy_reg(&sc->hw, PHY_1000T_STATUS, &phy_tmp);
2083 		if (!(phy_tmp & SR_1000T_MS_CONFIG_FAULT))
2084 			return;
2085 		em_read_phy_reg(&sc->hw, PHY_1000T_STATUS, &phy_tmp);
2086 		if (phy_tmp & SR_1000T_MS_CONFIG_FAULT) {
2087 			em_read_phy_reg(&sc->hw, PHY_1000T_CTRL,
2088 					&phy_tmp);
2089 			if (phy_tmp & CR_1000T_MS_ENABLE) {
2090 				phy_tmp &= ~CR_1000T_MS_ENABLE;
2091 				em_write_phy_reg(&sc->hw,
2092 						    PHY_1000T_CTRL, phy_tmp);
2093 				sc->smartspeed++;
2094 				if (sc->hw.autoneg &&
2095 				    !em_phy_setup_autoneg(&sc->hw) &&
2096 				    !em_read_phy_reg(&sc->hw, PHY_CTRL,
2097 						       &phy_tmp)) {
2098 					phy_tmp |= (MII_CR_AUTO_NEG_EN |
2099 						    MII_CR_RESTART_AUTO_NEG);
2100 					em_write_phy_reg(&sc->hw,
2101 							 PHY_CTRL, phy_tmp);
2102 				}
2103 			}
2104 		}
2105 		return;
2106 	} else if (sc->smartspeed == EM_SMARTSPEED_DOWNSHIFT) {
2107 		/* If still no link, perhaps using 2/3 pair cable */
2108 		em_read_phy_reg(&sc->hw, PHY_1000T_CTRL, &phy_tmp);
2109 		phy_tmp |= CR_1000T_MS_ENABLE;
2110 		em_write_phy_reg(&sc->hw, PHY_1000T_CTRL, phy_tmp);
2111 		if (sc->hw.autoneg &&
2112 		    !em_phy_setup_autoneg(&sc->hw) &&
2113 		    !em_read_phy_reg(&sc->hw, PHY_CTRL, &phy_tmp)) {
2114 			phy_tmp |= (MII_CR_AUTO_NEG_EN |
2115 				    MII_CR_RESTART_AUTO_NEG);
2116 			em_write_phy_reg(&sc->hw, PHY_CTRL, phy_tmp);
2117 		}
2118 	}
2119 	/* Restart process after EM_SMARTSPEED_MAX iterations */
2120 	if (sc->smartspeed++ == EM_SMARTSPEED_MAX)
2121 		sc->smartspeed = 0;
2122 }
2123 
2124 /*
2125  * Manage DMA'able memory.
2126  */
2127 int
2128 em_dma_malloc(struct em_softc *sc, bus_size_t size, struct em_dma_alloc *dma)
2129 {
2130 	int r;
2131 
2132 	r = bus_dmamap_create(sc->sc_dmat, size, 1,
2133 	    size, 0, BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, &dma->dma_map);
2134 	if (r != 0)
2135 		return (r);
2136 
2137 	r = bus_dmamem_alloc(sc->sc_dmat, size, PAGE_SIZE, 0, &dma->dma_seg,
2138 	    1, &dma->dma_nseg, BUS_DMA_WAITOK | BUS_DMA_ZERO);
2139 	if (r != 0)
2140 		goto destroy;
2141 
2142 	r = bus_dmamem_map(sc->sc_dmat, &dma->dma_seg, dma->dma_nseg, size,
2143 	    &dma->dma_vaddr, BUS_DMA_WAITOK | BUS_DMA_COHERENT);
2144 	if (r != 0)
2145 		goto free;
2146 
2147 	r = bus_dmamap_load(sc->sc_dmat, dma->dma_map, dma->dma_vaddr, size,
2148 	    NULL, BUS_DMA_WAITOK);
2149 	if (r != 0)
2150 		goto unmap;
2151 
2152 	dma->dma_size = size;
2153 	return (0);
2154 
2155 unmap:
2156 	bus_dmamem_unmap(sc->sc_dmat, dma->dma_vaddr, size);
2157 free:
2158 	bus_dmamem_free(sc->sc_dmat, &dma->dma_seg, dma->dma_nseg);
2159 destroy:
2160 	bus_dmamap_destroy(sc->sc_dmat, dma->dma_map);
2161 
2162 	return (r);
2163 }
2164 
2165 void
2166 em_dma_free(struct em_softc *sc, struct em_dma_alloc *dma)
2167 {
2168 	bus_dmamap_unload(sc->sc_dmat, dma->dma_map);
2169 	bus_dmamem_unmap(sc->sc_dmat, dma->dma_vaddr, dma->dma_size);
2170 	bus_dmamem_free(sc->sc_dmat, &dma->dma_seg, dma->dma_nseg);
2171 	bus_dmamap_destroy(sc->sc_dmat, dma->dma_map);
2172 }
2173 
2174 /*********************************************************************
2175  *
2176  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2177  *  the information needed to transmit a packet on the wire.
2178  *
2179  **********************************************************************/
2180 int
2181 em_allocate_transmit_structures(struct em_softc *sc)
2182 {
2183 	struct em_queue *que;
2184 
2185 	FOREACH_QUEUE(sc, que) {
2186 		bus_dmamap_sync(sc->sc_dmat, que->tx.sc_tx_dma.dma_map,
2187 		    0, que->tx.sc_tx_dma.dma_map->dm_mapsize,
2188 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2189 
2190 		que->tx.sc_tx_pkts_ring = mallocarray(sc->sc_tx_slots,
2191 		    sizeof(*que->tx.sc_tx_pkts_ring), M_DEVBUF, M_NOWAIT | M_ZERO);
2192 		if (que->tx.sc_tx_pkts_ring == NULL) {
2193 			printf("%s: Unable to allocate tx_buffer memory\n",
2194 			    DEVNAME(sc));
2195 			return (ENOMEM);
2196 		}
2197 	}
2198 
2199 	return (0);
2200 }
2201 
2202 /*********************************************************************
2203  *
2204  *  Allocate and initialize transmit structures.
2205  *
2206  **********************************************************************/
2207 int
2208 em_setup_transmit_structures(struct em_softc *sc)
2209 {
2210 	struct em_queue *que;
2211 	struct em_packet *pkt;
2212 	int error, i;
2213 
2214 	if ((error = em_allocate_transmit_structures(sc)) != 0)
2215 		goto fail;
2216 
2217 	FOREACH_QUEUE(sc, que) {
2218 		bzero((void *) que->tx.sc_tx_desc_ring,
2219 		    (sizeof(struct em_tx_desc)) * sc->sc_tx_slots);
2220 
2221 		for (i = 0; i < sc->sc_tx_slots; i++) {
2222 			pkt = &que->tx.sc_tx_pkts_ring[i];
2223 			error = bus_dmamap_create(sc->sc_dmat, MAX_JUMBO_FRAME_SIZE,
2224 			    EM_MAX_SCATTER / (sc->pcix_82544 ? 2 : 1),
2225 			    MAX_JUMBO_FRAME_SIZE, 0, BUS_DMA_NOWAIT, &pkt->pkt_map);
2226 			if (error != 0) {
2227 				printf("%s: Unable to create TX DMA map\n",
2228 				    DEVNAME(sc));
2229 				goto fail;
2230 			}
2231 		}
2232 
2233 		que->tx.sc_tx_desc_head = 0;
2234 		que->tx.sc_tx_desc_tail = 0;
2235 
2236 		/* Set checksum context */
2237 		que->tx.active_checksum_context = OFFLOAD_NONE;
2238 	}
2239 
2240 	return (0);
2241 
2242 fail:
2243 	em_free_transmit_structures(sc);
2244 	return (error);
2245 }
2246 
2247 /*********************************************************************
2248  *
2249  *  Enable transmit unit.
2250  *
2251  **********************************************************************/
2252 void
2253 em_initialize_transmit_unit(struct em_softc *sc)
2254 {
2255 	u_int32_t	reg_tctl, reg_tipg = 0;
2256 	u_int64_t	bus_addr;
2257 	struct em_queue *que;
2258 
2259 	INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
2260 
2261 	FOREACH_QUEUE(sc, que) {
2262 		/* Setup the Base and Length of the Tx Descriptor Ring */
2263 		bus_addr = que->tx.sc_tx_dma.dma_map->dm_segs[0].ds_addr;
2264 		E1000_WRITE_REG(&sc->hw, TDLEN(que->me),
2265 		    sc->sc_tx_slots *
2266 		    sizeof(struct em_tx_desc));
2267 		E1000_WRITE_REG(&sc->hw, TDBAH(que->me), (u_int32_t)(bus_addr >> 32));
2268 		E1000_WRITE_REG(&sc->hw, TDBAL(que->me), (u_int32_t)bus_addr);
2269 
2270 		/* Setup the HW Tx Head and Tail descriptor pointers */
2271 		E1000_WRITE_REG(&sc->hw, TDT(que->me), 0);
2272 		E1000_WRITE_REG(&sc->hw, TDH(que->me), 0);
2273 
2274 		HW_DEBUGOUT2("Base = %x, Length = %x\n",
2275 		    E1000_READ_REG(&sc->hw, TDBAL(que->me)),
2276 		    E1000_READ_REG(&sc->hw, TDLEN(que->me)));
2277 
2278 		/* Set the default values for the Tx Inter Packet Gap timer */
2279 		switch (sc->hw.mac_type) {
2280 		case em_82542_rev2_0:
2281 		case em_82542_rev2_1:
2282 			reg_tipg = DEFAULT_82542_TIPG_IPGT;
2283 			reg_tipg |= DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
2284 			reg_tipg |= DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
2285 			break;
2286 		case em_80003es2lan:
2287 			reg_tipg = DEFAULT_82543_TIPG_IPGR1;
2288 			reg_tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
2289 			break;
2290 		default:
2291 			if (sc->hw.media_type == em_media_type_fiber ||
2292 			    sc->hw.media_type == em_media_type_internal_serdes)
2293 				reg_tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
2294 			else
2295 				reg_tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
2296 			reg_tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
2297 			reg_tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
2298 		}
2299 
2300 
2301 		E1000_WRITE_REG(&sc->hw, TIPG, reg_tipg);
2302 		E1000_WRITE_REG(&sc->hw, TIDV, sc->tx_int_delay);
2303 		if (sc->hw.mac_type >= em_82540)
2304 			E1000_WRITE_REG(&sc->hw, TADV, sc->tx_abs_int_delay);
2305 
2306 		/* Setup Transmit Descriptor Base Settings */
2307 		que->tx.sc_txd_cmd = E1000_TXD_CMD_IFCS;
2308 
2309 		if (sc->hw.mac_type == em_82575 || sc->hw.mac_type == em_82580 ||
2310 		    sc->hw.mac_type == em_82576 ||
2311 		    sc->hw.mac_type == em_i210 || sc->hw.mac_type == em_i350) {
2312 			/* 82575/6 need to enable the TX queue and lack the IDE bit */
2313 			reg_tctl = E1000_READ_REG(&sc->hw, TXDCTL(que->me));
2314 			reg_tctl |= E1000_TXDCTL_QUEUE_ENABLE;
2315 			E1000_WRITE_REG(&sc->hw, TXDCTL(que->me), reg_tctl);
2316 		} else if (sc->tx_int_delay > 0)
2317 			que->tx.sc_txd_cmd |= E1000_TXD_CMD_IDE;
2318 	}
2319 
2320 	/* Program the Transmit Control Register */
2321 	reg_tctl = E1000_TCTL_PSP | E1000_TCTL_EN |
2322 		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2323 	if (sc->hw.mac_type >= em_82571)
2324 		reg_tctl |= E1000_TCTL_MULR;
2325 	if (sc->link_duplex == FULL_DUPLEX)
2326 		reg_tctl |= E1000_FDX_COLLISION_DISTANCE << E1000_COLD_SHIFT;
2327 	else
2328 		reg_tctl |= E1000_HDX_COLLISION_DISTANCE << E1000_COLD_SHIFT;
2329 	/* This write will effectively turn on the transmit unit */
2330 	E1000_WRITE_REG(&sc->hw, TCTL, reg_tctl);
2331 
2332 	/* SPT Si errata workaround to avoid data corruption */
2333 
2334 	if (sc->hw.mac_type == em_pch_spt) {
2335 		uint32_t	reg_val;
2336 
2337 		reg_val = EM_READ_REG(&sc->hw, E1000_IOSFPC);
2338 		reg_val |= E1000_RCTL_RDMTS_HEX;
2339 		EM_WRITE_REG(&sc->hw, E1000_IOSFPC, reg_val);
2340 
2341 		reg_val = E1000_READ_REG(&sc->hw, TARC0);
2342 		/* i218-i219 Specification Update 1.5.4.5 */
2343 		reg_val &= ~E1000_TARC0_CB_MULTIQ_3_REQ;
2344 		reg_val |= E1000_TARC0_CB_MULTIQ_2_REQ;
2345 		E1000_WRITE_REG(&sc->hw, TARC0, reg_val);
2346 	}
2347 }
2348 
2349 /*********************************************************************
2350  *
2351  *  Free all transmit related data structures.
2352  *
2353  **********************************************************************/
2354 void
2355 em_free_transmit_structures(struct em_softc *sc)
2356 {
2357 	struct em_queue *que;
2358 	struct em_packet *pkt;
2359 	int i;
2360 
2361 	INIT_DEBUGOUT("free_transmit_structures: begin");
2362 
2363 	FOREACH_QUEUE(sc, que) {
2364 		if (que->tx.sc_tx_pkts_ring != NULL) {
2365 			for (i = 0; i < sc->sc_tx_slots; i++) {
2366 				pkt = &que->tx.sc_tx_pkts_ring[i];
2367 
2368 				if (pkt->pkt_m != NULL) {
2369 					bus_dmamap_sync(sc->sc_dmat, pkt->pkt_map,
2370 					    0, pkt->pkt_map->dm_mapsize,
2371 					    BUS_DMASYNC_POSTWRITE);
2372 					bus_dmamap_unload(sc->sc_dmat,
2373 					    pkt->pkt_map);
2374 
2375 					m_freem(pkt->pkt_m);
2376 					pkt->pkt_m = NULL;
2377 				}
2378 
2379 				if (pkt->pkt_map != NULL) {
2380 					bus_dmamap_destroy(sc->sc_dmat,
2381 					    pkt->pkt_map);
2382 					pkt->pkt_map = NULL;
2383 				}
2384 			}
2385 
2386 			free(que->tx.sc_tx_pkts_ring, M_DEVBUF,
2387 			    sc->sc_tx_slots * sizeof(*que->tx.sc_tx_pkts_ring));
2388 			que->tx.sc_tx_pkts_ring = NULL;
2389 		}
2390 
2391 		bus_dmamap_sync(sc->sc_dmat, que->tx.sc_tx_dma.dma_map,
2392 		    0, que->tx.sc_tx_dma.dma_map->dm_mapsize,
2393 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2394 	}
2395 }
2396 
2397 u_int
2398 em_tx_ctx_setup(struct em_queue *que, struct mbuf *mp, u_int head,
2399     u_int32_t *olinfo_status, u_int32_t *cmd_type_len)
2400 {
2401 	struct e1000_adv_tx_context_desc *TD;
2402 	struct ether_header *eh = mtod(mp, struct ether_header *);
2403 	struct mbuf *m;
2404 	uint32_t vlan_macip_lens = 0, type_tucmd_mlhl = 0, mss_l4len_idx = 0;
2405 	int off = 0, hoff;
2406 	uint8_t ipproto, iphlen;
2407 
2408 	*olinfo_status = 0;
2409 	*cmd_type_len = 0;
2410 	TD = (struct e1000_adv_tx_context_desc *)&que->tx.sc_tx_desc_ring[head];
2411 
2412 #if NVLAN > 0
2413 	if (ISSET(mp->m_flags, M_VLANTAG)) {
2414 		uint16_t vtag = htole16(mp->m_pkthdr.ether_vtag);
2415 		vlan_macip_lens |= vtag << E1000_ADVTXD_VLAN_SHIFT;
2416 		*cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
2417 		off = 1;
2418 	}
2419 #endif
2420 
2421 	vlan_macip_lens |= (sizeof(*eh) << E1000_ADVTXD_MACLEN_SHIFT);
2422 
2423 	switch (ntohs(eh->ether_type)) {
2424 	case ETHERTYPE_IP: {
2425 		struct ip *ip;
2426 
2427 		m = m_getptr(mp, sizeof(*eh), &hoff);
2428 		ip = (struct ip *)(mtod(m, caddr_t) + hoff);
2429 
2430 		iphlen = ip->ip_hl << 2;
2431 		ipproto = ip->ip_p;
2432 
2433 		type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
2434 		if (ISSET(mp->m_pkthdr.csum_flags, M_IPV4_CSUM_OUT)) {
2435 			*olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
2436 			off = 1;
2437 		}
2438 
2439 		break;
2440 	}
2441 #ifdef INET6
2442 	case ETHERTYPE_IPV6: {
2443 		struct ip6_hdr *ip6;
2444 
2445 		m = m_getptr(mp, sizeof(*eh), &hoff);
2446 		ip6 = (struct ip6_hdr *)(mtod(m, caddr_t) + hoff);
2447 
2448 		iphlen = sizeof(*ip6);
2449 		ipproto = ip6->ip6_nxt;
2450 
2451 		type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
2452 		break;
2453 	}
2454 #endif
2455 	default:
2456 		iphlen = 0;
2457 		ipproto = 0;
2458 		break;
2459 	}
2460 
2461 	*cmd_type_len |= E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS;
2462 	*cmd_type_len |= E1000_ADVTXD_DCMD_DEXT;
2463 	*olinfo_status |= mp->m_pkthdr.len << E1000_ADVTXD_PAYLEN_SHIFT;
2464 	vlan_macip_lens |= iphlen;
2465 	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
2466 
2467 	switch (ipproto) {
2468 	case IPPROTO_TCP:
2469 		type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
2470 		if (ISSET(mp->m_pkthdr.csum_flags, M_TCP_CSUM_OUT)) {
2471 			*olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
2472 			off = 1;
2473 		}
2474 		break;
2475 	case IPPROTO_UDP:
2476 		type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
2477 		if (ISSET(mp->m_pkthdr.csum_flags, M_UDP_CSUM_OUT)) {
2478 			*olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
2479 			off = 1;
2480 		}
2481 		break;
2482 	}
2483 
2484 	if (!off)
2485 		return (0);
2486 
2487 	/* 82575 needs the queue index added */
2488 	if (que->sc->hw.mac_type == em_82575)
2489 		mss_l4len_idx |= (que->me & 0xff) << 4;
2490 
2491 	htolem32(&TD->vlan_macip_lens, vlan_macip_lens);
2492 	htolem32(&TD->type_tucmd_mlhl, type_tucmd_mlhl);
2493 	htolem32(&TD->u.seqnum_seed, 0);
2494 	htolem32(&TD->mss_l4len_idx, mss_l4len_idx);
2495 
2496 	return (1);
2497 }
2498 
2499 /*********************************************************************
2500  *
2501  *  The offload context needs to be set when we transfer the first
2502  *  packet of a particular protocol (TCP/UDP). We change the
2503  *  context only if the protocol type changes.
2504  *
2505  **********************************************************************/
2506 u_int
2507 em_transmit_checksum_setup(struct em_queue *que, struct mbuf *mp, u_int head,
2508     u_int32_t *txd_upper, u_int32_t *txd_lower)
2509 {
2510 	struct em_context_desc *TXD;
2511 
2512 	if (mp->m_pkthdr.csum_flags & M_TCP_CSUM_OUT) {
2513 		*txd_upper = E1000_TXD_POPTS_TXSM << 8;
2514 		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
2515 		if (que->tx.active_checksum_context == OFFLOAD_TCP_IP)
2516 			return (0);
2517 		else
2518 			que->tx.active_checksum_context = OFFLOAD_TCP_IP;
2519 	} else if (mp->m_pkthdr.csum_flags & M_UDP_CSUM_OUT) {
2520 		*txd_upper = E1000_TXD_POPTS_TXSM << 8;
2521 		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
2522 		if (que->tx.active_checksum_context == OFFLOAD_UDP_IP)
2523 			return (0);
2524 		else
2525 			que->tx.active_checksum_context = OFFLOAD_UDP_IP;
2526 	} else {
2527 		*txd_upper = 0;
2528 		*txd_lower = 0;
2529 		return (0);
2530 	}
2531 
2532 	/* If we reach this point, the checksum offload context
2533 	 * needs to be reset.
2534 	 */
2535 	TXD = (struct em_context_desc *)&que->tx.sc_tx_desc_ring[head];
2536 
2537 	TXD->lower_setup.ip_fields.ipcss = ETHER_HDR_LEN;
2538 	TXD->lower_setup.ip_fields.ipcso =
2539 	    ETHER_HDR_LEN + offsetof(struct ip, ip_sum);
2540 	TXD->lower_setup.ip_fields.ipcse =
2541 	    htole16(ETHER_HDR_LEN + sizeof(struct ip) - 1);
2542 
2543 	TXD->upper_setup.tcp_fields.tucss =
2544 	    ETHER_HDR_LEN + sizeof(struct ip);
2545 	TXD->upper_setup.tcp_fields.tucse = htole16(0);
2546 
2547 	if (que->tx.active_checksum_context == OFFLOAD_TCP_IP) {
2548 		TXD->upper_setup.tcp_fields.tucso =
2549 		    ETHER_HDR_LEN + sizeof(struct ip) +
2550 		    offsetof(struct tcphdr, th_sum);
2551 	} else if (que->tx.active_checksum_context == OFFLOAD_UDP_IP) {
2552 		TXD->upper_setup.tcp_fields.tucso =
2553 		    ETHER_HDR_LEN + sizeof(struct ip) +
2554 		    offsetof(struct udphdr, uh_sum);
2555 	}
2556 
2557 	TXD->tcp_seg_setup.data = htole32(0);
2558 	TXD->cmd_and_length = htole32(que->tx.sc_txd_cmd | E1000_TXD_CMD_DEXT);
2559 
2560 	return (1);
2561 }
2562 
2563 /**********************************************************************
2564  *
2565  *  Examine each tx_buffer in the used queue. If the hardware is done
2566  *  processing the packet then free associated resources. The
2567  *  tx_buffer is put back on the free queue.
2568  *
2569  **********************************************************************/
2570 void
2571 em_txeof(struct em_queue *que)
2572 {
2573 	struct em_softc *sc = que->sc;
2574 	struct ifnet *ifp = &sc->sc_ac.ac_if;
2575 	struct em_packet *pkt;
2576 	struct em_tx_desc *desc;
2577 	u_int head, tail;
2578 	u_int free = 0;
2579 
2580 	head = que->tx.sc_tx_desc_head;
2581 	tail = que->tx.sc_tx_desc_tail;
2582 
2583 	if (head == tail)
2584 		return;
2585 
2586 	bus_dmamap_sync(sc->sc_dmat, que->tx.sc_tx_dma.dma_map,
2587 	    0, que->tx.sc_tx_dma.dma_map->dm_mapsize,
2588 	    BUS_DMASYNC_POSTREAD);
2589 
2590 	do {
2591 		pkt = &que->tx.sc_tx_pkts_ring[tail];
2592 		desc = &que->tx.sc_tx_desc_ring[pkt->pkt_eop];
2593 
2594 		if (!ISSET(desc->upper.fields.status, E1000_TXD_STAT_DD))
2595 			break;
2596 
2597 		bus_dmamap_sync(sc->sc_dmat, pkt->pkt_map,
2598 		    0, pkt->pkt_map->dm_mapsize,
2599 		    BUS_DMASYNC_POSTWRITE);
2600 		bus_dmamap_unload(sc->sc_dmat, pkt->pkt_map);
2601 
2602 		KASSERT(pkt->pkt_m != NULL);
2603 
2604 		m_freem(pkt->pkt_m);
2605 		pkt->pkt_m = NULL;
2606 
2607 		tail = pkt->pkt_eop;
2608 
2609 		if (++tail == sc->sc_tx_slots)
2610 			tail = 0;
2611 
2612 		free++;
2613 	} while (tail != head);
2614 
2615 	bus_dmamap_sync(sc->sc_dmat, que->tx.sc_tx_dma.dma_map,
2616 	    0, que->tx.sc_tx_dma.dma_map->dm_mapsize,
2617 	    BUS_DMASYNC_PREREAD);
2618 
2619 	if (free == 0)
2620 		return;
2621 
2622 	que->tx.sc_tx_desc_tail = tail;
2623 
2624 	if (ifq_is_oactive(&ifp->if_snd))
2625 		ifq_restart(&ifp->if_snd);
2626 	else if (tail == head)
2627 		ifp->if_timer = 0;
2628 }
2629 
2630 /*********************************************************************
2631  *
2632  *  Get a buffer from system mbuf buffer pool.
2633  *
2634  **********************************************************************/
2635 int
2636 em_get_buf(struct em_queue *que, int i)
2637 {
2638 	struct em_softc *sc = que->sc;
2639 	struct mbuf    *m;
2640 	struct em_packet *pkt;
2641 	struct em_rx_desc *desc;
2642 	int error;
2643 
2644 	pkt = &que->rx.sc_rx_pkts_ring[i];
2645 	desc = &que->rx.sc_rx_desc_ring[i];
2646 
2647 	KASSERT(pkt->pkt_m == NULL);
2648 
2649 	m = MCLGETL(NULL, M_DONTWAIT, EM_MCLBYTES);
2650 	if (m == NULL) {
2651 		sc->mbuf_cluster_failed++;
2652 		return (ENOBUFS);
2653 	}
2654 	m->m_len = m->m_pkthdr.len = EM_MCLBYTES;
2655 	m_adj(m, ETHER_ALIGN);
2656 
2657 	error = bus_dmamap_load_mbuf(sc->sc_dmat, pkt->pkt_map,
2658 	    m, BUS_DMA_NOWAIT);
2659 	if (error) {
2660 		m_freem(m);
2661 		return (error);
2662 	}
2663 
2664 	bus_dmamap_sync(sc->sc_dmat, pkt->pkt_map,
2665 	    0, pkt->pkt_map->dm_mapsize,
2666 	    BUS_DMASYNC_PREREAD);
2667 	pkt->pkt_m = m;
2668 
2669 	memset(desc, 0, sizeof(*desc));
2670 	htolem64(&desc->buffer_addr, pkt->pkt_map->dm_segs[0].ds_addr);
2671 
2672 	return (0);
2673 }
2674 
2675 /*********************************************************************
2676  *
2677  *  Allocate memory for rx_buffer structures. Since we use one
2678  *  rx_buffer per received packet, the maximum number of rx_buffer's
2679  *  that we'll need is equal to the number of receive descriptors
2680  *  that we've allocated.
2681  *
2682  **********************************************************************/
2683 int
2684 em_allocate_receive_structures(struct em_softc *sc)
2685 {
2686 	struct em_queue *que;
2687 	struct em_packet *pkt;
2688 	int i;
2689 	int error;
2690 
2691 	FOREACH_QUEUE(sc, que) {
2692 		que->rx.sc_rx_pkts_ring = mallocarray(sc->sc_rx_slots,
2693 		    sizeof(*que->rx.sc_rx_pkts_ring),
2694 		    M_DEVBUF, M_NOWAIT | M_ZERO);
2695 		if (que->rx.sc_rx_pkts_ring == NULL) {
2696 			printf("%s: Unable to allocate rx_buffer memory\n",
2697 			    DEVNAME(sc));
2698 			return (ENOMEM);
2699 		}
2700 
2701 		bus_dmamap_sync(sc->sc_dmat, que->rx.sc_rx_dma.dma_map,
2702 		    0, que->rx.sc_rx_dma.dma_map->dm_mapsize,
2703 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2704 
2705 		for (i = 0; i < sc->sc_rx_slots; i++) {
2706 			pkt = &que->rx.sc_rx_pkts_ring[i];
2707 
2708 			error = bus_dmamap_create(sc->sc_dmat, EM_MCLBYTES, 1,
2709 			    EM_MCLBYTES, 0, BUS_DMA_NOWAIT, &pkt->pkt_map);
2710 			if (error != 0) {
2711 				printf("%s: em_allocate_receive_structures: "
2712 				    "bus_dmamap_create failed; error %u\n",
2713 				    DEVNAME(sc), error);
2714 				goto fail;
2715 			}
2716 
2717 			pkt->pkt_m = NULL;
2718 		}
2719 	}
2720 
2721         return (0);
2722 
2723 fail:
2724 	em_free_receive_structures(sc);
2725 	return (error);
2726 }
2727 
2728 /*********************************************************************
2729  *
2730  *  Allocate and initialize receive structures.
2731  *
2732  **********************************************************************/
2733 int
2734 em_setup_receive_structures(struct em_softc *sc)
2735 {
2736 	struct ifnet *ifp = &sc->sc_ac.ac_if;
2737 	struct em_queue *que;
2738 	u_int lwm;
2739 
2740 	if (em_allocate_receive_structures(sc))
2741 		return (ENOMEM);
2742 
2743 	FOREACH_QUEUE(sc, que) {
2744 		memset(que->rx.sc_rx_desc_ring, 0,
2745 		    sc->sc_rx_slots * sizeof(*que->rx.sc_rx_desc_ring));
2746 
2747 		/* Setup our descriptor pointers */
2748 		que->rx.sc_rx_desc_tail = 0;
2749 		que->rx.sc_rx_desc_head = sc->sc_rx_slots - 1;
2750 
2751 		lwm = max(4, 2 * ((ifp->if_hardmtu / MCLBYTES) + 1));
2752 		if_rxr_init(&que->rx.sc_rx_ring, lwm, sc->sc_rx_slots);
2753 
2754 		if (em_rxfill(que) == 0) {
2755 			printf("%s: unable to fill any rx descriptors\n",
2756 			    DEVNAME(sc));
2757 			return (ENOMEM);
2758 		}
2759 	}
2760 
2761 	return (0);
2762 }
2763 
2764 /*********************************************************************
2765  *
2766  *  Enable receive unit.
2767  *
2768  **********************************************************************/
2769 void
2770 em_initialize_receive_unit(struct em_softc *sc)
2771 {
2772 	struct em_queue *que;
2773 	u_int32_t	reg_rctl;
2774 	u_int32_t	reg_rxcsum;
2775 	u_int32_t	reg_srrctl;
2776 	u_int64_t	bus_addr;
2777 
2778 	INIT_DEBUGOUT("em_initialize_receive_unit: begin");
2779 
2780 	/* Make sure receives are disabled while setting up the descriptor ring */
2781 	E1000_WRITE_REG(&sc->hw, RCTL, 0);
2782 
2783 	/* Set the Receive Delay Timer Register */
2784 	E1000_WRITE_REG(&sc->hw, RDTR,
2785 			sc->rx_int_delay | E1000_RDT_FPDB);
2786 
2787 	if (sc->hw.mac_type >= em_82540) {
2788 		if (sc->rx_int_delay)
2789 			E1000_WRITE_REG(&sc->hw, RADV, sc->rx_abs_int_delay);
2790 
2791 		/* Set the interrupt throttling rate.  Value is calculated
2792 		 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns) */
2793 		E1000_WRITE_REG(&sc->hw, ITR, DEFAULT_ITR);
2794 	}
2795 
2796 	/* Setup the Receive Control Register */
2797 	reg_rctl = E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
2798 	    E1000_RCTL_RDMTS_HALF |
2799 	    (sc->hw.mc_filter_type << E1000_RCTL_MO_SHIFT);
2800 
2801 	if (sc->hw.tbi_compatibility_on == TRUE)
2802 		reg_rctl |= E1000_RCTL_SBP;
2803 
2804 	/*
2805 	 * The i350 has a bug where it always strips the CRC whether
2806 	 * asked to or not.  So ask for stripped CRC here and
2807 	 * cope in rxeof
2808 	 */
2809 	if (sc->hw.mac_type == em_i210 || sc->hw.mac_type == em_i350)
2810 		reg_rctl |= E1000_RCTL_SECRC;
2811 
2812 	switch (sc->sc_rx_buffer_len) {
2813 	default:
2814 	case EM_RXBUFFER_2048:
2815 		reg_rctl |= E1000_RCTL_SZ_2048;
2816 		break;
2817 	case EM_RXBUFFER_4096:
2818 		reg_rctl |= E1000_RCTL_SZ_4096|E1000_RCTL_BSEX|E1000_RCTL_LPE;
2819 		break;
2820 	case EM_RXBUFFER_8192:
2821 		reg_rctl |= E1000_RCTL_SZ_8192|E1000_RCTL_BSEX|E1000_RCTL_LPE;
2822 		break;
2823 	case EM_RXBUFFER_16384:
2824 		reg_rctl |= E1000_RCTL_SZ_16384|E1000_RCTL_BSEX|E1000_RCTL_LPE;
2825 		break;
2826 	}
2827 
2828 	if (sc->hw.max_frame_size != ETHER_MAX_LEN)
2829 		reg_rctl |= E1000_RCTL_LPE;
2830 
2831 	/* Enable 82543 Receive Checksum Offload for TCP and UDP */
2832 	if (sc->hw.mac_type >= em_82543) {
2833 		reg_rxcsum = E1000_READ_REG(&sc->hw, RXCSUM);
2834 		reg_rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
2835 		E1000_WRITE_REG(&sc->hw, RXCSUM, reg_rxcsum);
2836 	}
2837 
2838 	/*
2839 	 * XXX TEMPORARY WORKAROUND: on some systems with 82573
2840 	 * long latencies are observed, like Lenovo X60.
2841 	 */
2842 	if (sc->hw.mac_type == em_82573)
2843 		E1000_WRITE_REG(&sc->hw, RDTR, 0x20);
2844 
2845 	FOREACH_QUEUE(sc, que) {
2846 		if (sc->num_queues > 1) {
2847 			/*
2848 			 * Disable Drop Enable for every queue, default has
2849 			 * it enabled for queues > 0
2850 			 */
2851 			reg_srrctl = E1000_READ_REG(&sc->hw, SRRCTL(que->me));
2852 			reg_srrctl &= ~E1000_SRRCTL_DROP_EN;
2853 			E1000_WRITE_REG(&sc->hw, SRRCTL(que->me), reg_srrctl);
2854 		}
2855 
2856 		/* Setup the Base and Length of the Rx Descriptor Ring */
2857 		bus_addr = que->rx.sc_rx_dma.dma_map->dm_segs[0].ds_addr;
2858 		E1000_WRITE_REG(&sc->hw, RDLEN(que->me),
2859 		    sc->sc_rx_slots * sizeof(*que->rx.sc_rx_desc_ring));
2860 		E1000_WRITE_REG(&sc->hw, RDBAH(que->me), (u_int32_t)(bus_addr >> 32));
2861 		E1000_WRITE_REG(&sc->hw, RDBAL(que->me), (u_int32_t)bus_addr);
2862 
2863 		if (sc->hw.mac_type == em_82575 || sc->hw.mac_type == em_82580 ||
2864 		    sc->hw.mac_type == em_82576 ||
2865 		    sc->hw.mac_type == em_i210 || sc->hw.mac_type == em_i350) {
2866 			/* 82575/6 need to enable the RX queue */
2867 			uint32_t reg;
2868 			reg = E1000_READ_REG(&sc->hw, RXDCTL(que->me));
2869 			reg |= E1000_RXDCTL_QUEUE_ENABLE;
2870 			E1000_WRITE_REG(&sc->hw, RXDCTL(que->me), reg);
2871 		}
2872 	}
2873 
2874 	/* Enable Receives */
2875 	E1000_WRITE_REG(&sc->hw, RCTL, reg_rctl);
2876 
2877 	/* Setup the HW Rx Head and Tail Descriptor Pointers */
2878 	FOREACH_QUEUE(sc, que) {
2879 		E1000_WRITE_REG(&sc->hw, RDH(que->me), 0);
2880 		E1000_WRITE_REG(&sc->hw, RDT(que->me), que->rx.sc_rx_desc_head);
2881 	}
2882 }
2883 
2884 /*********************************************************************
2885  *
2886  *  Free receive related data structures.
2887  *
2888  **********************************************************************/
2889 void
2890 em_free_receive_structures(struct em_softc *sc)
2891 {
2892 	struct em_queue *que;
2893 	struct em_packet *pkt;
2894 	int i;
2895 
2896 	INIT_DEBUGOUT("free_receive_structures: begin");
2897 
2898 	FOREACH_QUEUE(sc, que) {
2899 		if_rxr_init(&que->rx.sc_rx_ring, 0, 0);
2900 
2901 		bus_dmamap_sync(sc->sc_dmat, que->rx.sc_rx_dma.dma_map,
2902 		    0, que->rx.sc_rx_dma.dma_map->dm_mapsize,
2903 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2904 
2905 		if (que->rx.sc_rx_pkts_ring != NULL) {
2906 			for (i = 0; i < sc->sc_rx_slots; i++) {
2907 				pkt = &que->rx.sc_rx_pkts_ring[i];
2908 				if (pkt->pkt_m != NULL) {
2909 					bus_dmamap_sync(sc->sc_dmat,
2910 					    pkt->pkt_map,
2911 					    0, pkt->pkt_map->dm_mapsize,
2912 					    BUS_DMASYNC_POSTREAD);
2913 					bus_dmamap_unload(sc->sc_dmat,
2914 					    pkt->pkt_map);
2915 					m_freem(pkt->pkt_m);
2916 					pkt->pkt_m = NULL;
2917 				}
2918 				bus_dmamap_destroy(sc->sc_dmat, pkt->pkt_map);
2919 			}
2920 
2921 			free(que->rx.sc_rx_pkts_ring, M_DEVBUF,
2922 			    sc->sc_rx_slots * sizeof(*que->rx.sc_rx_pkts_ring));
2923 			que->rx.sc_rx_pkts_ring = NULL;
2924 		}
2925 
2926 		if (que->rx.fmp != NULL) {
2927 			m_freem(que->rx.fmp);
2928 			que->rx.fmp = NULL;
2929 			que->rx.lmp = NULL;
2930 		}
2931 	}
2932 }
2933 
2934 int
2935 em_rxfill(struct em_queue *que)
2936 {
2937 	struct em_softc *sc = que->sc;
2938 	u_int slots;
2939 	int post = 0;
2940 	int i;
2941 
2942 	i = que->rx.sc_rx_desc_head;
2943 
2944 	bus_dmamap_sync(sc->sc_dmat, que->rx.sc_rx_dma.dma_map,
2945 	    0, que->rx.sc_rx_dma.dma_map->dm_mapsize,
2946 	    BUS_DMASYNC_POSTWRITE);
2947 
2948 	for (slots = if_rxr_get(&que->rx.sc_rx_ring, sc->sc_rx_slots);
2949 	    slots > 0; slots--) {
2950 		if (++i == sc->sc_rx_slots)
2951 			i = 0;
2952 
2953 		if (em_get_buf(que, i) != 0)
2954 			break;
2955 
2956 		que->rx.sc_rx_desc_head = i;
2957 		post = 1;
2958 	}
2959 
2960 	if_rxr_put(&que->rx.sc_rx_ring, slots);
2961 
2962 	bus_dmamap_sync(sc->sc_dmat, que->rx.sc_rx_dma.dma_map,
2963 	    0, que->rx.sc_rx_dma.dma_map->dm_mapsize,
2964 	    BUS_DMASYNC_PREWRITE);
2965 
2966 	return (post);
2967 }
2968 
2969 void
2970 em_rxrefill(void *arg)
2971 {
2972 	struct em_queue *que = arg;
2973 	struct em_softc *sc = que->sc;
2974 
2975 	if (em_rxfill(que))
2976 		E1000_WRITE_REG(&sc->hw, RDT(que->me), que->rx.sc_rx_desc_head);
2977 	else if (if_rxr_needrefill(&que->rx.sc_rx_ring))
2978 		timeout_add(&que->rx_refill, 1);
2979 }
2980 
2981 /*********************************************************************
2982  *
2983  *  This routine executes in interrupt context. It replenishes
2984  *  the mbufs in the descriptor and sends data which has been
2985  *  dma'ed into host memory to upper layer.
2986  *
2987  *********************************************************************/
2988 int
2989 em_rxeof(struct em_queue *que)
2990 {
2991 	struct em_softc	    *sc = que->sc;
2992 	struct ifnet	    *ifp = &sc->sc_ac.ac_if;
2993 	struct mbuf_list    ml = MBUF_LIST_INITIALIZER();
2994 	struct mbuf	    *m;
2995 	u_int8_t	    accept_frame = 0;
2996 	u_int8_t	    eop = 0;
2997 	u_int16_t	    len, desc_len, prev_len_adj;
2998 	int		    i, rv = 0;
2999 
3000 	/* Pointer to the receive descriptor being examined. */
3001 	struct em_rx_desc   *desc;
3002 	struct em_packet    *pkt;
3003 	u_int8_t	    status;
3004 
3005 	if (if_rxr_inuse(&que->rx.sc_rx_ring) == 0)
3006 		return (0);
3007 
3008 	i = que->rx.sc_rx_desc_tail;
3009 
3010 	bus_dmamap_sync(sc->sc_dmat, que->rx.sc_rx_dma.dma_map,
3011 	    0, que->rx.sc_rx_dma.dma_map->dm_mapsize,
3012 	    BUS_DMASYNC_POSTREAD);
3013 
3014 	do {
3015 		m = NULL;
3016 
3017 		pkt = &que->rx.sc_rx_pkts_ring[i];
3018 		desc = &que->rx.sc_rx_desc_ring[i];
3019 
3020 		status = desc->status;
3021 		if (!ISSET(status, E1000_RXD_STAT_DD))
3022 			break;
3023 
3024 		/* pull the mbuf off the ring */
3025 		bus_dmamap_sync(sc->sc_dmat, pkt->pkt_map,
3026 		    0, pkt->pkt_map->dm_mapsize,
3027 		    BUS_DMASYNC_POSTREAD);
3028 		bus_dmamap_unload(sc->sc_dmat, pkt->pkt_map);
3029 		m = pkt->pkt_m;
3030 		pkt->pkt_m = NULL;
3031 
3032 		KASSERT(m != NULL);
3033 
3034 		if_rxr_put(&que->rx.sc_rx_ring, 1);
3035 		rv = 1;
3036 
3037 		accept_frame = 1;
3038 		prev_len_adj = 0;
3039 		desc_len = letoh16(desc->length);
3040 
3041 		if (status & E1000_RXD_STAT_EOP) {
3042 			eop = 1;
3043 			if (desc_len < ETHER_CRC_LEN) {
3044 				len = 0;
3045 				prev_len_adj = ETHER_CRC_LEN - desc_len;
3046 			} else if (sc->hw.mac_type == em_i210 ||
3047 			    sc->hw.mac_type == em_i350)
3048 				len = desc_len;
3049 			else
3050 				len = desc_len - ETHER_CRC_LEN;
3051 		} else {
3052 			eop = 0;
3053 			len = desc_len;
3054 		}
3055 
3056 		if (desc->errors & E1000_RXD_ERR_FRAME_ERR_MASK) {
3057 			u_int8_t last_byte;
3058 			u_int32_t pkt_len = desc_len;
3059 
3060 			if (que->rx.fmp != NULL)
3061 				pkt_len += que->rx.fmp->m_pkthdr.len;
3062 
3063 			last_byte = *(mtod(m, caddr_t) + desc_len - 1);
3064 			if (TBI_ACCEPT(&sc->hw, status, desc->errors,
3065 			    pkt_len, last_byte)) {
3066 #if NKSTAT > 0
3067 				em_tbi_adjust_stats(sc,
3068 				    pkt_len, sc->hw.mac_addr);
3069 #endif
3070 				if (len > 0)
3071 					len--;
3072 			} else
3073 				accept_frame = 0;
3074 		}
3075 
3076 		if (accept_frame) {
3077 			/* Assign correct length to the current fragment */
3078 			m->m_len = len;
3079 
3080 			if (que->rx.fmp == NULL) {
3081 				m->m_pkthdr.len = m->m_len;
3082 				que->rx.fmp = m;	 /* Store the first mbuf */
3083 				que->rx.lmp = m;
3084 			} else {
3085 				/* Chain mbuf's together */
3086 				m->m_flags &= ~M_PKTHDR;
3087 				/*
3088 				 * Adjust length of previous mbuf in chain if
3089 				 * we received less than 4 bytes in the last
3090 				 * descriptor.
3091 				 */
3092 				if (prev_len_adj > 0) {
3093 					que->rx.lmp->m_len -= prev_len_adj;
3094 					que->rx.fmp->m_pkthdr.len -= prev_len_adj;
3095 				}
3096 				que->rx.lmp->m_next = m;
3097 				que->rx.lmp = m;
3098 				que->rx.fmp->m_pkthdr.len += m->m_len;
3099 			}
3100 
3101 			if (eop) {
3102 				m = que->rx.fmp;
3103 
3104 				em_receive_checksum(sc, desc, m);
3105 #if NVLAN > 0
3106 				if (desc->status & E1000_RXD_STAT_VP) {
3107 					m->m_pkthdr.ether_vtag =
3108 					    letoh16(desc->special);
3109 					m->m_flags |= M_VLANTAG;
3110 				}
3111 #endif
3112 				ml_enqueue(&ml, m);
3113 
3114 				que->rx.fmp = NULL;
3115 				que->rx.lmp = NULL;
3116 			}
3117 		} else {
3118 			que->rx.dropped_pkts++;
3119 
3120 			if (que->rx.fmp != NULL) {
3121 				m_freem(que->rx.fmp);
3122 				que->rx.fmp = NULL;
3123 				que->rx.lmp = NULL;
3124 			}
3125 
3126 			m_freem(m);
3127 		}
3128 
3129 		/* Advance our pointers to the next descriptor. */
3130 		if (++i == sc->sc_rx_slots)
3131 			i = 0;
3132 	} while (if_rxr_inuse(&que->rx.sc_rx_ring) > 0);
3133 
3134 	bus_dmamap_sync(sc->sc_dmat, que->rx.sc_rx_dma.dma_map,
3135 	    0, que->rx.sc_rx_dma.dma_map->dm_mapsize,
3136 	    BUS_DMASYNC_PREREAD);
3137 
3138 	que->rx.sc_rx_desc_tail = i;
3139 
3140 	if (ifiq_input(&ifp->if_rcv, &ml))
3141 		if_rxr_livelocked(&que->rx.sc_rx_ring);
3142 
3143 	return (rv);
3144 }
3145 
3146 /*********************************************************************
3147  *
3148  *  Verify that the hardware indicated that the checksum is valid.
3149  *  Inform the stack about the status of checksum so that stack
3150  *  doesn't spend time verifying the checksum.
3151  *
3152  *********************************************************************/
3153 void
3154 em_receive_checksum(struct em_softc *sc, struct em_rx_desc *rx_desc,
3155     struct mbuf *mp)
3156 {
3157 	/* 82543 or newer only */
3158 	if ((sc->hw.mac_type < em_82543) ||
3159 	    /* Ignore Checksum bit is set */
3160 	    (rx_desc->status & E1000_RXD_STAT_IXSM)) {
3161 		mp->m_pkthdr.csum_flags = 0;
3162 		return;
3163 	}
3164 
3165 	if (rx_desc->status & E1000_RXD_STAT_IPCS) {
3166 		/* Did it pass? */
3167 		if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
3168 			/* IP Checksum Good */
3169 			mp->m_pkthdr.csum_flags = M_IPV4_CSUM_IN_OK;
3170 
3171 		} else
3172 			mp->m_pkthdr.csum_flags = 0;
3173 	}
3174 
3175 	if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
3176 		/* Did it pass? */
3177 		if (!(rx_desc->errors & E1000_RXD_ERR_TCPE))
3178 			mp->m_pkthdr.csum_flags |=
3179 				M_TCP_CSUM_IN_OK | M_UDP_CSUM_IN_OK;
3180 	}
3181 }
3182 
3183 /*
3184  * This turns on the hardware offload of the VLAN
3185  * tag insertion and strip
3186  */
3187 void
3188 em_enable_hw_vlans(struct em_softc *sc)
3189 {
3190 	uint32_t ctrl;
3191 
3192 	ctrl = E1000_READ_REG(&sc->hw, CTRL);
3193 	ctrl |= E1000_CTRL_VME;
3194 	E1000_WRITE_REG(&sc->hw, CTRL, ctrl);
3195 }
3196 
3197 void
3198 em_enable_intr(struct em_softc *sc)
3199 {
3200 	uint32_t mask;
3201 
3202 	if (sc->msix) {
3203 		mask = sc->msix_queuesmask | sc->msix_linkmask;
3204 		E1000_WRITE_REG(&sc->hw, EIAC, mask);
3205 		E1000_WRITE_REG(&sc->hw, EIAM, mask);
3206 		E1000_WRITE_REG(&sc->hw, EIMS, mask);
3207 		E1000_WRITE_REG(&sc->hw, IMS, E1000_IMS_LSC);
3208 	} else
3209 		E1000_WRITE_REG(&sc->hw, IMS, (IMS_ENABLE_MASK));
3210 }
3211 
3212 void
3213 em_disable_intr(struct em_softc *sc)
3214 {
3215 	/*
3216 	 * The first version of 82542 had an errata where when link
3217 	 * was forced it would stay up even if the cable was disconnected
3218 	 * Sequence errors were used to detect the disconnect and then
3219 	 * the driver would unforce the link.  This code is in the ISR.
3220 	 * For this to work correctly the Sequence error interrupt had
3221 	 * to be enabled all the time.
3222 	 */
3223 	if (sc->msix) {
3224 		E1000_WRITE_REG(&sc->hw, EIMC, ~0);
3225 		E1000_WRITE_REG(&sc->hw, EIAC, 0);
3226 	} else if (sc->hw.mac_type == em_82542_rev2_0)
3227 		E1000_WRITE_REG(&sc->hw, IMC, (0xffffffff & ~E1000_IMC_RXSEQ));
3228 	else
3229 		E1000_WRITE_REG(&sc->hw, IMC, 0xffffffff);
3230 }
3231 
3232 void
3233 em_write_pci_cfg(struct em_hw *hw, uint32_t reg, uint16_t *value)
3234 {
3235 	struct pci_attach_args *pa = &((struct em_osdep *)hw->back)->em_pa;
3236 	pcireg_t val;
3237 
3238 	val = pci_conf_read(pa->pa_pc, pa->pa_tag, reg & ~0x3);
3239 	if (reg & 0x2) {
3240 		val &= 0x0000ffff;
3241 		val |= (*value << 16);
3242 	} else {
3243 		val &= 0xffff0000;
3244 		val |= *value;
3245 	}
3246 	pci_conf_write(pa->pa_pc, pa->pa_tag, reg & ~0x3, val);
3247 }
3248 
3249 void
3250 em_read_pci_cfg(struct em_hw *hw, uint32_t reg, uint16_t *value)
3251 {
3252 	struct pci_attach_args *pa = &((struct em_osdep *)hw->back)->em_pa;
3253 	pcireg_t val;
3254 
3255 	val = pci_conf_read(pa->pa_pc, pa->pa_tag, reg & ~0x3);
3256 	if (reg & 0x2)
3257 		*value = (val >> 16) & 0xffff;
3258 	else
3259 		*value = val & 0xffff;
3260 }
3261 
3262 void
3263 em_pci_set_mwi(struct em_hw *hw)
3264 {
3265 	struct pci_attach_args *pa = &((struct em_osdep *)hw->back)->em_pa;
3266 
3267 	pci_conf_write(pa->pa_pc, pa->pa_tag, PCI_COMMAND_STATUS_REG,
3268 		(hw->pci_cmd_word | CMD_MEM_WRT_INVALIDATE));
3269 }
3270 
3271 void
3272 em_pci_clear_mwi(struct em_hw *hw)
3273 {
3274 	struct pci_attach_args *pa = &((struct em_osdep *)hw->back)->em_pa;
3275 
3276 	pci_conf_write(pa->pa_pc, pa->pa_tag, PCI_COMMAND_STATUS_REG,
3277 		(hw->pci_cmd_word & ~CMD_MEM_WRT_INVALIDATE));
3278 }
3279 
3280 /*
3281  * We may eventually really do this, but its unnecessary
3282  * for now so we just return unsupported.
3283  */
3284 int32_t
3285 em_read_pcie_cap_reg(struct em_hw *hw, uint32_t reg, uint16_t *value)
3286 {
3287 	return -E1000_NOT_IMPLEMENTED;
3288 }
3289 
3290 /*********************************************************************
3291 * 82544 Coexistence issue workaround.
3292 *    There are 2 issues.
3293 *       1. Transmit Hang issue.
3294 *    To detect this issue, following equation can be used...
3295 *          SIZE[3:0] + ADDR[2:0] = SUM[3:0].
3296 *          If SUM[3:0] is in between 1 to 4, we will have this issue.
3297 *
3298 *       2. DAC issue.
3299 *    To detect this issue, following equation can be used...
3300 *          SIZE[3:0] + ADDR[2:0] = SUM[3:0].
3301 *          If SUM[3:0] is in between 9 to c, we will have this issue.
3302 *
3303 *
3304 *    WORKAROUND:
3305 *          Make sure we do not have ending address as 1,2,3,4(Hang) or 9,a,b,c (DAC)
3306 *
3307 *** *********************************************************************/
3308 u_int32_t
3309 em_fill_descriptors(u_int64_t address, u_int32_t length,
3310     PDESC_ARRAY desc_array)
3311 {
3312         /* Since issue is sensitive to length and address.*/
3313         /* Let us first check the address...*/
3314         u_int32_t safe_terminator;
3315         if (length <= 4) {
3316                 desc_array->descriptor[0].address = address;
3317                 desc_array->descriptor[0].length = length;
3318                 desc_array->elements = 1;
3319                 return desc_array->elements;
3320         }
3321         safe_terminator = (u_int32_t)((((u_int32_t)address & 0x7) + (length & 0xF)) & 0xF);
3322         /* if it does not fall between 0x1 to 0x4 and 0x9 to 0xC then return */
3323         if (safe_terminator == 0   ||
3324         (safe_terminator > 4   &&
3325         safe_terminator < 9)   ||
3326         (safe_terminator > 0xC &&
3327         safe_terminator <= 0xF)) {
3328                 desc_array->descriptor[0].address = address;
3329                 desc_array->descriptor[0].length = length;
3330                 desc_array->elements = 1;
3331                 return desc_array->elements;
3332         }
3333 
3334         desc_array->descriptor[0].address = address;
3335         desc_array->descriptor[0].length = length - 4;
3336         desc_array->descriptor[1].address = address + (length - 4);
3337         desc_array->descriptor[1].length = 4;
3338         desc_array->elements = 2;
3339         return desc_array->elements;
3340 }
3341 
3342 /*
3343  * Disable the L0S and L1 LINK states.
3344  */
3345 void
3346 em_disable_aspm(struct em_softc *sc)
3347 {
3348 	int offset;
3349 	pcireg_t val;
3350 
3351 	switch (sc->hw.mac_type) {
3352 		case em_82571:
3353 		case em_82572:
3354 		case em_82573:
3355 		case em_82574:
3356 			break;
3357 		default:
3358 			return;
3359 	}
3360 
3361 	if (!pci_get_capability(sc->osdep.em_pa.pa_pc, sc->osdep.em_pa.pa_tag,
3362 	    PCI_CAP_PCIEXPRESS, &offset, NULL))
3363 		return;
3364 
3365 	/* Disable PCIe Active State Power Management (ASPM). */
3366 	val = pci_conf_read(sc->osdep.em_pa.pa_pc, sc->osdep.em_pa.pa_tag,
3367 	    offset + PCI_PCIE_LCSR);
3368 
3369 	switch (sc->hw.mac_type) {
3370 		case em_82571:
3371 		case em_82572:
3372 			val &= ~PCI_PCIE_LCSR_ASPM_L1;
3373 			break;
3374 		case em_82573:
3375 		case em_82574:
3376 			val &= ~(PCI_PCIE_LCSR_ASPM_L0S |
3377 			    PCI_PCIE_LCSR_ASPM_L1);
3378 			break;
3379 		default:
3380 			break;
3381 	}
3382 
3383 	pci_conf_write(sc->osdep.em_pa.pa_pc, sc->osdep.em_pa.pa_tag,
3384 	    offset + PCI_PCIE_LCSR, val);
3385 }
3386 
3387 /*
3388  * em_flush_tx_ring - remove all descriptors from the tx_ring
3389  *
3390  * We want to clear all pending descriptors from the TX ring.
3391  * zeroing happens when the HW reads the regs. We assign the ring itself as
3392  * the data of the next descriptor. We don't care about the data we are about
3393  * to reset the HW.
3394  */
3395 void
3396 em_flush_tx_ring(struct em_queue *que)
3397 {
3398 	struct em_softc		*sc = que->sc;
3399 	uint32_t		 tctl, txd_lower = E1000_TXD_CMD_IFCS;
3400 	uint16_t		 size = 512;
3401 	struct em_tx_desc	*txd;
3402 
3403 	KASSERT(que->tx.sc_tx_desc_ring != NULL);
3404 
3405 	tctl = EM_READ_REG(&sc->hw, E1000_TCTL);
3406 	EM_WRITE_REG(&sc->hw, E1000_TCTL, tctl | E1000_TCTL_EN);
3407 
3408 	KASSERT(EM_READ_REG(&sc->hw, E1000_TDT(que->me)) == que->tx.sc_tx_desc_head);
3409 
3410 	txd = &que->tx.sc_tx_desc_ring[que->tx.sc_tx_desc_head];
3411 	txd->buffer_addr = que->tx.sc_tx_dma.dma_map->dm_segs[0].ds_addr;
3412 	txd->lower.data = htole32(txd_lower | size);
3413 	txd->upper.data = 0;
3414 
3415 	/* flush descriptors to memory before notifying the HW */
3416 	bus_space_barrier(sc->osdep.mem_bus_space_tag,
3417 	    sc->osdep.mem_bus_space_handle, 0, 0, BUS_SPACE_BARRIER_WRITE);
3418 
3419 	if (++que->tx.sc_tx_desc_head == sc->sc_tx_slots)
3420 		que->tx.sc_tx_desc_head = 0;
3421 
3422 	EM_WRITE_REG(&sc->hw, E1000_TDT(que->me), que->tx.sc_tx_desc_head);
3423 	bus_space_barrier(sc->osdep.mem_bus_space_tag, sc->osdep.mem_bus_space_handle,
3424 	    0, 0, BUS_SPACE_BARRIER_READ|BUS_SPACE_BARRIER_WRITE);
3425 	usec_delay(250);
3426 }
3427 
3428 /*
3429  * em_flush_rx_ring - remove all descriptors from the rx_ring
3430  *
3431  * Mark all descriptors in the RX ring as consumed and disable the rx ring
3432  */
3433 void
3434 em_flush_rx_ring(struct em_queue *que)
3435 {
3436 	uint32_t	rctl, rxdctl;
3437 	struct em_softc	*sc = que->sc;
3438 
3439 	rctl = EM_READ_REG(&sc->hw, E1000_RCTL);
3440 	EM_WRITE_REG(&sc->hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
3441 	E1000_WRITE_FLUSH(&sc->hw);
3442 	usec_delay(150);
3443 
3444 	rxdctl = EM_READ_REG(&sc->hw, E1000_RXDCTL(que->me));
3445 	/* zero the lower 14 bits (prefetch and host thresholds) */
3446 	rxdctl &= 0xffffc000;
3447 	/*
3448 	 * update thresholds: prefetch threshold to 31, host threshold to 1
3449 	 * and make sure the granularity is "descriptors" and not "cache lines"
3450 	 */
3451 	rxdctl |= (0x1F | (1 << 8) | E1000_RXDCTL_THRESH_UNIT_DESC);
3452 	EM_WRITE_REG(&sc->hw, E1000_RXDCTL(que->me), rxdctl);
3453 
3454 	/* momentarily enable the RX ring for the changes to take effect */
3455 	EM_WRITE_REG(&sc->hw, E1000_RCTL, rctl | E1000_RCTL_EN);
3456 	E1000_WRITE_FLUSH(&sc->hw);
3457 	usec_delay(150);
3458 	EM_WRITE_REG(&sc->hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
3459 }
3460 
3461 /*
3462  * em_flush_desc_rings - remove all descriptors from the descriptor rings
3463  *
3464  * In i219, the descriptor rings must be emptied before resetting the HW
3465  * or before changing the device state to D3 during runtime (runtime PM).
3466  *
3467  * Failure to do this will cause the HW to enter a unit hang state which can
3468  * only be released by PCI reset on the device
3469  *
3470  */
3471 void
3472 em_flush_desc_rings(struct em_softc *sc)
3473 {
3474 	struct em_queue		*que = sc->queues; /* Use only first queue. */
3475 	struct pci_attach_args	*pa = &sc->osdep.em_pa;
3476 	uint32_t		 fextnvm11, tdlen;
3477 	uint16_t		 hang_state;
3478 
3479 	/* First, disable MULR fix in FEXTNVM11 */
3480 	fextnvm11 = EM_READ_REG(&sc->hw, E1000_FEXTNVM11);
3481 	fextnvm11 |= E1000_FEXTNVM11_DISABLE_MULR_FIX;
3482 	EM_WRITE_REG(&sc->hw, E1000_FEXTNVM11, fextnvm11);
3483 
3484 	/* do nothing if we're not in faulty state, or if the queue is empty */
3485 	tdlen = EM_READ_REG(&sc->hw, E1000_TDLEN(que->me));
3486 	hang_state = pci_conf_read(pa->pa_pc, pa->pa_tag, PCICFG_DESC_RING_STATUS);
3487 	if (!(hang_state & FLUSH_DESC_REQUIRED) || !tdlen)
3488 		return;
3489 	em_flush_tx_ring(que);
3490 
3491 	/* recheck, maybe the fault is caused by the rx ring */
3492 	hang_state = pci_conf_read(pa->pa_pc, pa->pa_tag, PCICFG_DESC_RING_STATUS);
3493 	if (hang_state & FLUSH_DESC_REQUIRED)
3494 		em_flush_rx_ring(que);
3495 }
3496 
3497 int
3498 em_allocate_legacy(struct em_softc *sc)
3499 {
3500 	pci_intr_handle_t	 ih;
3501 	const char		*intrstr = NULL;
3502 	struct pci_attach_args	*pa = &sc->osdep.em_pa;
3503 	pci_chipset_tag_t	 pc = pa->pa_pc;
3504 
3505 	if (pci_intr_map_msi(pa, &ih)) {
3506 		if (pci_intr_map(pa, &ih)) {
3507 			printf(": couldn't map interrupt\n");
3508 			return (ENXIO);
3509 		}
3510 		sc->legacy_irq = 1;
3511 	}
3512 
3513 	intrstr = pci_intr_string(pc, ih);
3514 	sc->sc_intrhand = pci_intr_establish(pc, ih, IPL_NET | IPL_MPSAFE,
3515 	    em_intr, sc, DEVNAME(sc));
3516 	if (sc->sc_intrhand == NULL) {
3517 		printf(": couldn't establish interrupt");
3518 		if (intrstr != NULL)
3519 			printf(" at %s", intrstr);
3520 		printf("\n");
3521 		return (ENXIO);
3522 	}
3523 	printf(": %s", intrstr);
3524 
3525 	return (0);
3526 }
3527 
3528 #if NKSTAT > 0
3529 /* this is used to look up the array of kstats quickly */
3530 enum em_stat {
3531 	em_stat_crcerrs,
3532 	em_stat_algnerrc,
3533 	em_stat_symerrs,
3534 	em_stat_rxerrc,
3535 	em_stat_mpc,
3536 	em_stat_scc,
3537 	em_stat_ecol,
3538 	em_stat_mcc,
3539 	em_stat_latecol,
3540 	em_stat_colc,
3541 	em_stat_dc,
3542 	em_stat_tncrs,
3543 	em_stat_sec,
3544 	em_stat_cexterr,
3545 	em_stat_rlec,
3546 	em_stat_xonrxc,
3547 	em_stat_xontxc,
3548 	em_stat_xoffrxc,
3549 	em_stat_xofftxc,
3550 	em_stat_fcruc,
3551 	em_stat_prc64,
3552 	em_stat_prc127,
3553 	em_stat_prc255,
3554 	em_stat_prc511,
3555 	em_stat_prc1023,
3556 	em_stat_prc1522,
3557 	em_stat_gprc,
3558 	em_stat_bprc,
3559 	em_stat_mprc,
3560 	em_stat_gptc,
3561 	em_stat_gorc,
3562 	em_stat_gotc,
3563 	em_stat_rnbc,
3564 	em_stat_ruc,
3565 	em_stat_rfc,
3566 	em_stat_roc,
3567 	em_stat_rjc,
3568 	em_stat_mgtprc,
3569 	em_stat_mgtpdc,
3570 	em_stat_mgtptc,
3571 	em_stat_tor,
3572 	em_stat_tot,
3573 	em_stat_tpr,
3574 	em_stat_tpt,
3575 	em_stat_ptc64,
3576 	em_stat_ptc127,
3577 	em_stat_ptc255,
3578 	em_stat_ptc511,
3579 	em_stat_ptc1023,
3580 	em_stat_ptc1522,
3581 	em_stat_mptc,
3582 	em_stat_bptc,
3583 #if 0
3584 	em_stat_tsctc,
3585 	em_stat_tsctf,
3586 #endif
3587 
3588 	em_stat_count,
3589 };
3590 
3591 struct em_counter {
3592 	const char		*name;
3593 	enum kstat_kv_unit	 unit;
3594 	uint32_t		 reg;
3595 };
3596 
3597 static const struct em_counter em_counters[em_stat_count] = {
3598 	[em_stat_crcerrs] =
3599 	    { "rx crc errs",	KSTAT_KV_U_PACKETS,	E1000_CRCERRS },
3600 	[em_stat_algnerrc] = /* >= em_82543 */
3601 	    { "rx align errs",	KSTAT_KV_U_PACKETS,	0 },
3602 	[em_stat_symerrs] = /* >= em_82543 */
3603 	    { "rx align errs",	KSTAT_KV_U_PACKETS,	0 },
3604 	[em_stat_rxerrc] =
3605 	    { "rx errs",	KSTAT_KV_U_PACKETS,	E1000_RXERRC },
3606 	[em_stat_mpc] =
3607 	    { "rx missed",	KSTAT_KV_U_PACKETS,	E1000_MPC },
3608 	[em_stat_scc] =
3609 	    { "tx single coll",	KSTAT_KV_U_PACKETS,	E1000_SCC },
3610 	[em_stat_ecol] =
3611 	    { "tx excess coll",	KSTAT_KV_U_PACKETS,	E1000_ECOL },
3612 	[em_stat_mcc] =
3613 	    { "tx multi coll",	KSTAT_KV_U_PACKETS,	E1000_MCC },
3614 	[em_stat_latecol] =
3615 	    { "tx late coll",	KSTAT_KV_U_PACKETS,	E1000_LATECOL },
3616 	[em_stat_colc] =
3617 	    { "tx coll",	KSTAT_KV_U_NONE,	E1000_COLC },
3618 	[em_stat_dc] =
3619 	    { "tx defers",	KSTAT_KV_U_NONE,	E1000_DC },
3620 	[em_stat_tncrs] = /* >= em_82543 */
3621 	    { "tx no CRS",	KSTAT_KV_U_PACKETS,	0 },
3622 	[em_stat_sec] =
3623 	    { "seq errs",	KSTAT_KV_U_NONE,	E1000_SEC },
3624 	[em_stat_cexterr] = /* >= em_82543 */
3625 	    { "carr ext errs",	KSTAT_KV_U_PACKETS,	0 },
3626 	[em_stat_rlec] =
3627 	    { "rx len errs",	KSTAT_KV_U_PACKETS,	E1000_RLEC },
3628 	[em_stat_xonrxc] =
3629 	    { "rx xon",		KSTAT_KV_U_PACKETS,	E1000_XONRXC },
3630 	[em_stat_xontxc] =
3631 	    { "tx xon",		KSTAT_KV_U_PACKETS,	E1000_XONTXC },
3632 	[em_stat_xoffrxc] =
3633 	    { "rx xoff",	KSTAT_KV_U_PACKETS,	E1000_XOFFRXC },
3634 	[em_stat_xofftxc] =
3635 	    { "tx xoff",	KSTAT_KV_U_PACKETS,	E1000_XOFFTXC },
3636 	[em_stat_fcruc] =
3637 	    { "FC unsupported",	KSTAT_KV_U_PACKETS,	E1000_FCRUC },
3638 	[em_stat_prc64] =
3639 	    { "rx 64B",		KSTAT_KV_U_PACKETS,	E1000_PRC64 },
3640 	[em_stat_prc127] =
3641 	    { "rx 65-127B",	KSTAT_KV_U_PACKETS,	E1000_PRC127 },
3642 	[em_stat_prc255] =
3643 	    { "rx 128-255B",	KSTAT_KV_U_PACKETS,	E1000_PRC255 },
3644 	[em_stat_prc511] =
3645 	    { "rx 256-511B",	KSTAT_KV_U_PACKETS,	E1000_PRC511 },
3646 	[em_stat_prc1023] =
3647 	    { "rx 512-1023B",	KSTAT_KV_U_PACKETS,	E1000_PRC1023 },
3648 	[em_stat_prc1522] =
3649 	    { "rx 1024-maxB",	KSTAT_KV_U_PACKETS,	E1000_PRC1522 },
3650 	[em_stat_gprc] =
3651 	    { "rx good",	KSTAT_KV_U_PACKETS,	E1000_GPRC },
3652 	[em_stat_bprc] =
3653 	    { "rx bcast",	KSTAT_KV_U_PACKETS,	E1000_BPRC },
3654 	[em_stat_mprc] =
3655 	    { "rx mcast",	KSTAT_KV_U_PACKETS,	E1000_MPRC },
3656 	[em_stat_gptc] =
3657 	    { "tx good",	KSTAT_KV_U_PACKETS,	E1000_GPTC },
3658 	[em_stat_gorc] = /* 64bit */
3659 	    { "rx good",	KSTAT_KV_U_BYTES,	0 },
3660 	[em_stat_gotc] = /* 64bit */
3661 	    { "tx good",	KSTAT_KV_U_BYTES,	0 },
3662 	[em_stat_rnbc] =
3663 	    { "rx no buffers",	KSTAT_KV_U_PACKETS,	E1000_RNBC },
3664 	[em_stat_ruc] =
3665 	    { "rx undersize",	KSTAT_KV_U_PACKETS,	E1000_RUC },
3666 	[em_stat_rfc] =
3667 	    { "rx fragments",	KSTAT_KV_U_PACKETS,	E1000_RFC },
3668 	[em_stat_roc] =
3669 	    { "rx oversize",	KSTAT_KV_U_PACKETS,	E1000_ROC },
3670 	[em_stat_rjc] =
3671 	    { "rx jabbers",	KSTAT_KV_U_PACKETS,	E1000_RJC },
3672 	[em_stat_mgtprc] =
3673 	    { "rx mgmt",	KSTAT_KV_U_PACKETS,	E1000_MGTPRC },
3674 	[em_stat_mgtpdc] =
3675 	    { "rx mgmt drops",	KSTAT_KV_U_PACKETS,	E1000_MGTPDC },
3676 	[em_stat_mgtptc] =
3677 	    { "tx mgmt",	KSTAT_KV_U_PACKETS,	E1000_MGTPTC },
3678 	[em_stat_tor] = /* 64bit */
3679 	    { "rx total",	KSTAT_KV_U_BYTES,	0 },
3680 	[em_stat_tot] = /* 64bit */
3681 	    { "tx total",	KSTAT_KV_U_BYTES,	0 },
3682 	[em_stat_tpr] =
3683 	    { "rx total",	KSTAT_KV_U_PACKETS,	E1000_TPR },
3684 	[em_stat_tpt] =
3685 	    { "tx total",	KSTAT_KV_U_PACKETS,	E1000_TPT },
3686 	[em_stat_ptc64] =
3687 	    { "tx 64B",		KSTAT_KV_U_PACKETS,	E1000_PTC64 },
3688 	[em_stat_ptc127] =
3689 	    { "tx 65-127B",	KSTAT_KV_U_PACKETS,	E1000_PTC127 },
3690 	[em_stat_ptc255] =
3691 	    { "tx 128-255B",	KSTAT_KV_U_PACKETS,	E1000_PTC255 },
3692 	[em_stat_ptc511] =
3693 	    { "tx 256-511B",	KSTAT_KV_U_PACKETS,	E1000_PTC511 },
3694 	[em_stat_ptc1023] =
3695 	    { "tx 512-1023B",	KSTAT_KV_U_PACKETS,	E1000_PTC1023 },
3696 	[em_stat_ptc1522] =
3697 	    { "tx 1024-maxB",	KSTAT_KV_U_PACKETS,	E1000_PTC1522 },
3698 	[em_stat_mptc] =
3699 	    { "tx mcast",	KSTAT_KV_U_PACKETS,	E1000_MPTC },
3700 	[em_stat_bptc] =
3701 	    { "tx bcast",	KSTAT_KV_U_PACKETS,	E1000_BPTC },
3702 };
3703 
3704 /**********************************************************************
3705  *
3706  *  Update the board statistics counters.
3707  *
3708  **********************************************************************/
3709 int
3710 em_kstat_read(struct kstat *ks)
3711 {
3712 	struct em_softc *sc = ks->ks_softc;
3713 	struct em_hw *hw = &sc->hw;
3714 	struct kstat_kv *kvs = ks->ks_data;
3715 	uint32_t lo, hi;
3716 	unsigned int i;
3717 
3718 	for (i = 0; i < nitems(em_counters); i++) {
3719 		const struct em_counter *c = &em_counters[i];
3720 		if (c->reg == 0)
3721 			continue;
3722 
3723 		kstat_kv_u64(&kvs[i]) += EM_READ_REG(hw,
3724 		    E1000_REG_TR(hw, c->reg)); /* wtf */
3725 	}
3726 
3727 	/* Handle the exceptions. */
3728 
3729 	if (sc->hw.mac_type >= em_82543) {
3730 		kstat_kv_u64(&kvs[em_stat_algnerrc]) +=
3731 		    E1000_READ_REG(hw, ALGNERRC);
3732 		kstat_kv_u64(&kvs[em_stat_rxerrc]) +=
3733 		    E1000_READ_REG(hw, RXERRC);
3734 		kstat_kv_u64(&kvs[em_stat_cexterr]) +=
3735 		    E1000_READ_REG(hw, CEXTERR);
3736 		kstat_kv_u64(&kvs[em_stat_tncrs]) +=
3737 		    E1000_READ_REG(hw, TNCRS);
3738 #if 0
3739 		sc->stats.tsctc +=
3740 		E1000_READ_REG(hw, TSCTC);
3741 		sc->stats.tsctfc +=
3742 		E1000_READ_REG(hw, TSCTFC);
3743 #endif
3744 	}
3745 
3746 	/* For the 64-bit byte counters the low dword must be read first. */
3747 	/* Both registers clear on the read of the high dword */
3748 
3749 	lo = E1000_READ_REG(hw, GORCL);
3750 	hi = E1000_READ_REG(hw, GORCH);
3751 	kstat_kv_u64(&kvs[em_stat_gorc]) +=
3752 	    ((uint64_t)hi << 32) | (uint64_t)lo;
3753 
3754 	lo = E1000_READ_REG(hw, GOTCL);
3755 	hi = E1000_READ_REG(hw, GOTCH);
3756 	kstat_kv_u64(&kvs[em_stat_gotc]) +=
3757 	    ((uint64_t)hi << 32) | (uint64_t)lo;
3758 
3759 	lo = E1000_READ_REG(hw, TORL);
3760 	hi = E1000_READ_REG(hw, TORH);
3761 	kstat_kv_u64(&kvs[em_stat_tor]) +=
3762 	    ((uint64_t)hi << 32) | (uint64_t)lo;
3763 
3764 	lo = E1000_READ_REG(hw, TOTL);
3765 	hi = E1000_READ_REG(hw, TOTH);
3766 	kstat_kv_u64(&kvs[em_stat_tot]) +=
3767 	    ((uint64_t)hi << 32) | (uint64_t)lo;
3768 
3769 	getnanouptime(&ks->ks_updated);
3770 
3771 	return (0);
3772 }
3773 
3774 void
3775 em_kstat_attach(struct em_softc *sc)
3776 {
3777 	struct kstat *ks;
3778 	struct kstat_kv *kvs;
3779 	unsigned int i;
3780 
3781 	mtx_init(&sc->kstat_mtx, IPL_SOFTCLOCK);
3782 
3783 	ks = kstat_create(DEVNAME(sc), 0, "em-stats", 0,
3784 	    KSTAT_T_KV, 0);
3785 	if (ks == NULL)
3786 		return;
3787 
3788 	kvs = mallocarray(nitems(em_counters), sizeof(*kvs),
3789 	    M_DEVBUF, M_WAITOK|M_ZERO);
3790 	for (i = 0; i < nitems(em_counters); i++) {
3791 		const struct em_counter *c = &em_counters[i];
3792 		kstat_kv_unit_init(&kvs[i], c->name,
3793 		    KSTAT_KV_T_COUNTER64, c->unit);
3794 	}
3795 
3796 	ks->ks_softc = sc;
3797 	ks->ks_data = kvs;
3798 	ks->ks_datalen = nitems(em_counters) * sizeof(*kvs);
3799 	ks->ks_read = em_kstat_read;
3800 	kstat_set_mutex(ks, &sc->kstat_mtx);
3801 
3802 	kstat_install(ks);
3803 }
3804 
3805 /******************************************************************************
3806  * Adjusts the statistic counters when a frame is accepted by TBI_ACCEPT
3807  *****************************************************************************/
3808 void
3809 em_tbi_adjust_stats(struct em_softc *sc, uint32_t frame_len, uint8_t *mac_addr)
3810 {
3811 	struct em_hw *hw = &sc->hw;
3812 	struct kstat *ks = sc->kstat;
3813 	struct kstat_kv *kvs;
3814 
3815 	if (ks == NULL)
3816 		return;
3817 
3818 	/* First adjust the frame length. */
3819 	frame_len--;
3820 
3821 	mtx_enter(&sc->kstat_mtx);
3822 	kvs = ks->ks_data;
3823 
3824 	/*
3825 	 * We need to adjust the statistics counters, since the hardware
3826 	 * counters overcount this packet as a CRC error and undercount the
3827 	 * packet as a good packet
3828 	 */
3829 
3830 	/* This packet should not be counted as a CRC error.	*/
3831 	kstat_kv_u64(&kvs[em_stat_crcerrs])--;
3832 	/* This packet does count as a Good Packet Received.	*/
3833 	kstat_kv_u64(&kvs[em_stat_gprc])++;
3834 
3835 	/* Adjust the Good Octets received counters		*/
3836 	kstat_kv_u64(&kvs[em_stat_gorc]) += frame_len;
3837 
3838 	/*
3839 	 * Is this a broadcast or multicast?  Check broadcast first, since
3840 	 * the test for a multicast frame will test positive on a broadcast
3841 	 * frame.
3842 	 */
3843 	if (ETHER_IS_BROADCAST(mac_addr)) {
3844 		/* Broadcast packet */
3845 		kstat_kv_u64(&kvs[em_stat_bprc])++;
3846 	} else if (ETHER_IS_MULTICAST(mac_addr)) {
3847 		/* Multicast packet */
3848 		kstat_kv_u64(&kvs[em_stat_mprc])++;
3849 	}
3850 
3851 	if (frame_len == hw->max_frame_size) {
3852 		/*
3853 		 * In this case, the hardware has overcounted the number of
3854 		 * oversize frames.
3855 		 */
3856 		kstat_kv_u64(&kvs[em_stat_roc])--;
3857 	}
3858 
3859 	/*
3860 	 * Adjust the bin counters when the extra byte put the frame in the
3861 	 * wrong bin. Remember that the frame_len was adjusted above.
3862 	 */
3863 	if (frame_len == 64) {
3864 		kstat_kv_u64(&kvs[em_stat_prc64])++;
3865 		kstat_kv_u64(&kvs[em_stat_prc127])--;
3866 	} else if (frame_len == 127) {
3867 		kstat_kv_u64(&kvs[em_stat_prc127])++;
3868 		kstat_kv_u64(&kvs[em_stat_prc255])--;
3869 	} else if (frame_len == 255) {
3870 		kstat_kv_u64(&kvs[em_stat_prc255])++;
3871 		kstat_kv_u64(&kvs[em_stat_prc511])--;
3872 	} else if (frame_len == 511) {
3873 		kstat_kv_u64(&kvs[em_stat_prc511])++;
3874 		kstat_kv_u64(&kvs[em_stat_prc1023])--;
3875 	} else if (frame_len == 1023) {
3876 		kstat_kv_u64(&kvs[em_stat_prc1023])++;
3877 		kstat_kv_u64(&kvs[em_stat_prc1522])--;
3878 	} else if (frame_len == 1522) {
3879 		kstat_kv_u64(&kvs[em_stat_prc1522])++;
3880 	}
3881 
3882 	mtx_leave(&sc->kstat_mtx);
3883 }
3884 #endif /* NKSTAT > 0 */
3885 
3886 #ifndef SMALL_KERNEL
3887 int
3888 em_allocate_msix(struct em_softc *sc)
3889 {
3890 	pci_intr_handle_t	 ih;
3891 	const char		*intrstr = NULL;
3892 	struct pci_attach_args	*pa = &sc->osdep.em_pa;
3893 	pci_chipset_tag_t	 pc = pa->pa_pc;
3894 	struct em_queue		*que = sc->queues; /* Use only first queue. */
3895 	int			 vec;
3896 
3897 	if (!em_enable_msix)
3898 		return (ENODEV);
3899 
3900 	switch (sc->hw.mac_type) {
3901 	case em_82576:
3902 	case em_82580:
3903 	case em_i350:
3904 	case em_i210:
3905 		break;
3906 	default:
3907 		return (ENODEV);
3908 	}
3909 
3910 	vec = 0;
3911 	if (pci_intr_map_msix(pa, vec, &ih))
3912 		return (ENODEV);
3913 	sc->msix = 1;
3914 
3915 	que->me = vec;
3916 	que->eims = 1 << vec;
3917 	snprintf(que->name, sizeof(que->name), "%s:%d", DEVNAME(sc), vec);
3918 
3919 	intrstr = pci_intr_string(pc, ih);
3920 	que->tag = pci_intr_establish(pc, ih, IPL_NET | IPL_MPSAFE,
3921 	    em_queue_intr_msix, que, que->name);
3922 	if (que->tag == NULL) {
3923 		printf(": couldn't establish interrupt");
3924 		if (intrstr != NULL)
3925 			printf(" at %s", intrstr);
3926 		printf("\n");
3927 		return (ENXIO);
3928 	}
3929 
3930 	/* Setup linkvector, use last queue vector + 1 */
3931 	vec++;
3932 	sc->msix_linkvec = vec;
3933 	if (pci_intr_map_msix(pa, sc->msix_linkvec, &ih)) {
3934 		printf(": couldn't map link vector\n");
3935 		return (ENXIO);
3936 	}
3937 
3938 	intrstr = pci_intr_string(pc, ih);
3939 	sc->sc_intrhand = pci_intr_establish(pc, ih, IPL_NET | IPL_MPSAFE,
3940 	    em_link_intr_msix, sc, DEVNAME(sc));
3941 	if (sc->sc_intrhand == NULL) {
3942 		printf(": couldn't establish interrupt");
3943 		if (intrstr != NULL)
3944 			printf(" at %s", intrstr);
3945 		printf("\n");
3946 		return (ENXIO);
3947 	}
3948 	printf(", %s, %d queue%s", intrstr, vec, (vec > 1) ? "s" : "");
3949 
3950 	return (0);
3951 }
3952 
3953 /*
3954  * Interrupt for a specific queue, (not link interrupts). The EICR bit which
3955  * maps to the EIMS bit expresses both RX and TX, therefore we can't
3956  * distinguish if this is a RX completion of TX completion and must do both.
3957  * The bits in EICR are autocleared and we _cannot_ read EICR.
3958  */
3959 int
3960 em_queue_intr_msix(void *vque)
3961 {
3962 	struct em_queue *que = vque;
3963 	struct em_softc *sc = que->sc;
3964 	struct ifnet   *ifp = &sc->sc_ac.ac_if;
3965 
3966 	if (ifp->if_flags & IFF_RUNNING) {
3967 		em_txeof(que);
3968 		if (em_rxeof(que))
3969 			em_rxrefill(que);
3970 	}
3971 
3972 	em_enable_queue_intr_msix(que);
3973 
3974 	return (1);
3975 }
3976 
3977 int
3978 em_link_intr_msix(void *arg)
3979 {
3980 	struct em_softc *sc = arg;
3981 	uint32_t icr;
3982 
3983 	icr = E1000_READ_REG(&sc->hw, ICR);
3984 
3985 	/* Link status change */
3986 	if (icr & E1000_ICR_LSC) {
3987 		KERNEL_LOCK();
3988 		sc->hw.get_link_status = 1;
3989 		em_check_for_link(&sc->hw);
3990 		em_update_link_status(sc);
3991 		KERNEL_UNLOCK();
3992 	}
3993 
3994 	/* Re-arm unconditionally */
3995 	E1000_WRITE_REG(&sc->hw, IMS, E1000_ICR_LSC);
3996 	E1000_WRITE_REG(&sc->hw, EIMS, sc->msix_linkmask);
3997 
3998 	return (1);
3999 }
4000 
4001 /*
4002  * Maps queues into msix interrupt vectors.
4003  */
4004 int
4005 em_setup_queues_msix(struct em_softc *sc)
4006 {
4007 	uint32_t ivar, newitr, index;
4008 	struct em_queue *que;
4009 
4010 	KASSERT(sc->msix);
4011 
4012 	/* First turn on RSS capability */
4013 	if (sc->hw.mac_type != em_82575)
4014 		E1000_WRITE_REG(&sc->hw, GPIE,
4015 		    E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
4016 		    E1000_GPIE_PBA | E1000_GPIE_NSICR);
4017 
4018 	/* Turn on MSIX */
4019 	switch (sc->hw.mac_type) {
4020 	case em_82580:
4021 	case em_i350:
4022 	case em_i210:
4023 		/* RX entries */
4024 		/*
4025 		 * Note, this maps Queues into MSIX vectors, it works fine.
4026 		 * The funky calculation of offsets and checking if que->me is
4027 		 * odd is due to the weird register distribution, the datasheet
4028 		 * explains it well.
4029 		 */
4030 		FOREACH_QUEUE(sc, que) {
4031 			index = que->me >> 1;
4032 			ivar = E1000_READ_REG_ARRAY(&sc->hw, IVAR0, index);
4033 			if (que->me & 1) {
4034 				ivar &= 0xFF00FFFF;
4035 				ivar |= (que->me | E1000_IVAR_VALID) << 16;
4036 			} else {
4037 				ivar &= 0xFFFFFF00;
4038 				ivar |= que->me | E1000_IVAR_VALID;
4039 			}
4040 			E1000_WRITE_REG_ARRAY(&sc->hw, IVAR0, index, ivar);
4041 		}
4042 
4043 		/* TX entries */
4044 		FOREACH_QUEUE(sc, que) {
4045 			index = que->me >> 1;
4046 			ivar = E1000_READ_REG_ARRAY(&sc->hw, IVAR0, index);
4047 			if (que->me & 1) {
4048 				ivar &= 0x00FFFFFF;
4049 				ivar |= (que->me | E1000_IVAR_VALID) << 24;
4050 			} else {
4051 				ivar &= 0xFFFF00FF;
4052 				ivar |= (que->me | E1000_IVAR_VALID) << 8;
4053 			}
4054 			E1000_WRITE_REG_ARRAY(&sc->hw, IVAR0, index, ivar);
4055 			sc->msix_queuesmask |= que->eims;
4056 		}
4057 
4058 		/* And for the link interrupt */
4059 		ivar = (sc->msix_linkvec | E1000_IVAR_VALID) << 8;
4060 		sc->msix_linkmask = 1 << sc->msix_linkvec;
4061 		E1000_WRITE_REG(&sc->hw, IVAR_MISC, ivar);
4062 		break;
4063 	case em_82576:
4064 		/* RX entries */
4065 		FOREACH_QUEUE(sc, que) {
4066 			index = que->me & 0x7; /* Each IVAR has two entries */
4067 			ivar = E1000_READ_REG_ARRAY(&sc->hw, IVAR0, index);
4068 			if (que->me < 8) {
4069 				ivar &= 0xFFFFFF00;
4070 				ivar |= que->me | E1000_IVAR_VALID;
4071 			} else {
4072 				ivar &= 0xFF00FFFF;
4073 				ivar |= (que->me | E1000_IVAR_VALID) << 16;
4074 			}
4075 			E1000_WRITE_REG_ARRAY(&sc->hw, IVAR0, index, ivar);
4076 			sc->msix_queuesmask |= que->eims;
4077 		}
4078 		/* TX entries */
4079 		FOREACH_QUEUE(sc, que) {
4080 			index = que->me & 0x7; /* Each IVAR has two entries */
4081 			ivar = E1000_READ_REG_ARRAY(&sc->hw, IVAR0, index);
4082 			if (que->me < 8) {
4083 				ivar &= 0xFFFF00FF;
4084 				ivar |= (que->me | E1000_IVAR_VALID) << 8;
4085 			} else {
4086 				ivar &= 0x00FFFFFF;
4087 				ivar |= (que->me | E1000_IVAR_VALID) << 24;
4088 			}
4089 			E1000_WRITE_REG_ARRAY(&sc->hw, IVAR0, index, ivar);
4090 			sc->msix_queuesmask |= que->eims;
4091 		}
4092 
4093 		/* And for the link interrupt */
4094 		ivar = (sc->msix_linkvec | E1000_IVAR_VALID) << 8;
4095 		sc->msix_linkmask = 1 << sc->msix_linkvec;
4096 		E1000_WRITE_REG(&sc->hw, IVAR_MISC, ivar);
4097 		break;
4098 	default:
4099 		panic("unsupported mac");
4100 		break;
4101 	}
4102 
4103 	/* Set the starting interrupt rate */
4104 	newitr = (4000000 / MAX_INTS_PER_SEC) & 0x7FFC;
4105 
4106 	if (sc->hw.mac_type == em_82575)
4107 		newitr |= newitr << 16;
4108 	else
4109 		newitr |= E1000_EITR_CNT_IGNR;
4110 
4111 	FOREACH_QUEUE(sc, que)
4112 		E1000_WRITE_REG(&sc->hw, EITR(que->me), newitr);
4113 
4114 	return (0);
4115 }
4116 
4117 void
4118 em_enable_queue_intr_msix(struct em_queue *que)
4119 {
4120 	E1000_WRITE_REG(&que->sc->hw, EIMS, que->eims);
4121 }
4122 #endif /* !SMALL_KERNEL */
4123 
4124 int
4125 em_allocate_desc_rings(struct em_softc *sc)
4126 {
4127 	struct em_queue *que;
4128 
4129 	FOREACH_QUEUE(sc, que) {
4130 		/* Allocate Transmit Descriptor ring */
4131 		if (em_dma_malloc(sc, sc->sc_tx_slots * sizeof(struct em_tx_desc),
4132 		    &que->tx.sc_tx_dma) != 0) {
4133 			printf("%s: Unable to allocate tx_desc memory\n",
4134 			    DEVNAME(sc));
4135 			return (ENOMEM);
4136 		}
4137 		que->tx.sc_tx_desc_ring =
4138 		    (struct em_tx_desc *)que->tx.sc_tx_dma.dma_vaddr;
4139 
4140 		/* Allocate Receive Descriptor ring */
4141 		if (em_dma_malloc(sc, sc->sc_rx_slots * sizeof(struct em_rx_desc),
4142 		    &que->rx.sc_rx_dma) != 0) {
4143 			printf("%s: Unable to allocate rx_desc memory\n",
4144 			    DEVNAME(sc));
4145 			return (ENOMEM);
4146 		}
4147 		que->rx.sc_rx_desc_ring =
4148 		    (struct em_rx_desc *)que->rx.sc_rx_dma.dma_vaddr;
4149 	}
4150 
4151 	return (0);
4152 }
4153 
4154 int
4155 em_get_sffpage(struct em_softc *sc, struct if_sffpage *sff)
4156 {
4157 	struct em_hw *hw = &sc->hw;
4158 	size_t i;
4159 	int off;
4160 
4161 	if (hw->mac_type != em_82575 && hw->mac_type != em_82580 &&
4162 	    hw->mac_type != em_82576 &&
4163 	    hw->mac_type != em_i210 && hw->mac_type != em_i350)
4164 		return (ENODEV);
4165 
4166 	if (sff->sff_addr == IFSFF_ADDR_EEPROM)
4167 		off = E1000_I2CCMD_SFP_DATA_ADDR(0);
4168 	else if (sff->sff_addr == IFSFF_ADDR_DDM)
4169 		off = E1000_I2CCMD_SFP_DIAG_ADDR(0);
4170 	else
4171 		return (EIO);
4172 
4173 	for (i = 0; i < sizeof(sff->sff_data); i++) {
4174 		if (em_read_sfp_data_byte(hw, off + i,
4175 		    &sff->sff_data[i]) != E1000_SUCCESS)
4176 			return (EIO);
4177 	}
4178 
4179 	return (0);
4180 }
4181