xref: /dflybsd-src/sys/dev/netif/bnx/if_bnx.c (revision ca86d83e7d8d6bfef814ef3683c37d99ad62f11c)
1 /*
2  * Copyright (c) 2001 Wind River Systems
3  * Copyright (c) 1997, 1998, 1999, 2001
4  *	Bill Paul <wpaul@windriver.com>.  All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. All advertising materials mentioning features or use of this software
15  *    must display the following acknowledgement:
16  *	This product includes software developed by Bill Paul.
17  * 4. Neither the name of the author nor the names of any co-contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD
25  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
31  * THE POSSIBILITY OF SUCH DAMAGE.
32  *
33  * $FreeBSD: src/sys/dev/bge/if_bge.c,v 1.3.2.39 2005/07/03 03:41:18 silby Exp $
34  */
35 
36 #include "opt_bnx.h"
37 #include "opt_ifpoll.h"
38 
39 #include <sys/param.h>
40 #include <sys/bus.h>
41 #include <sys/endian.h>
42 #include <sys/kernel.h>
43 #include <sys/interrupt.h>
44 #include <sys/mbuf.h>
45 #include <sys/malloc.h>
46 #include <sys/queue.h>
47 #include <sys/rman.h>
48 #include <sys/serialize.h>
49 #include <sys/socket.h>
50 #include <sys/sockio.h>
51 #include <sys/sysctl.h>
52 
53 #include <netinet/ip.h>
54 #include <netinet/tcp.h>
55 
56 #include <net/bpf.h>
57 #include <net/ethernet.h>
58 #include <net/if.h>
59 #include <net/if_arp.h>
60 #include <net/if_dl.h>
61 #include <net/if_media.h>
62 #include <net/if_poll.h>
63 #include <net/if_types.h>
64 #include <net/ifq_var.h>
65 #include <net/vlan/if_vlan_var.h>
66 #include <net/vlan/if_vlan_ether.h>
67 
68 #include <dev/netif/mii_layer/mii.h>
69 #include <dev/netif/mii_layer/miivar.h>
70 #include <dev/netif/mii_layer/brgphyreg.h>
71 
72 #include <bus/pci/pcidevs.h>
73 #include <bus/pci/pcireg.h>
74 #include <bus/pci/pcivar.h>
75 
76 #include <dev/netif/bge/if_bgereg.h>
77 #include <dev/netif/bnx/if_bnxvar.h>
78 
79 /* "device miibus" required.  See GENERIC if you get errors here. */
80 #include "miibus_if.h"
81 
82 #define BNX_CSUM_FEATURES	(CSUM_IP | CSUM_TCP | CSUM_UDP)
83 
84 #define BNX_INTR_CKINTVL	((10 * hz) / 1000)	/* 10ms */
85 
86 static const struct bnx_type {
87 	uint16_t		bnx_vid;
88 	uint16_t		bnx_did;
89 	char			*bnx_name;
90 } bnx_devs[] = {
91 	{ PCI_VENDOR_BROADCOM, PCI_PRODUCT_BROADCOM_BCM5717,
92 		"Broadcom BCM5717 Gigabit Ethernet" },
93 	{ PCI_VENDOR_BROADCOM, PCI_PRODUCT_BROADCOM_BCM5717C,
94 		"Broadcom BCM5717C Gigabit Ethernet" },
95 	{ PCI_VENDOR_BROADCOM, PCI_PRODUCT_BROADCOM_BCM5718,
96 		"Broadcom BCM5718 Gigabit Ethernet" },
97 	{ PCI_VENDOR_BROADCOM, PCI_PRODUCT_BROADCOM_BCM5719,
98 		"Broadcom BCM5719 Gigabit Ethernet" },
99 	{ PCI_VENDOR_BROADCOM, PCI_PRODUCT_BROADCOM_BCM5720_ALT,
100 		"Broadcom BCM5720 Gigabit Ethernet" },
101 
102 	{ PCI_VENDOR_BROADCOM, PCI_PRODUCT_BROADCOM_BCM5725,
103 		"Broadcom BCM5725 Gigabit Ethernet" },
104 	{ PCI_VENDOR_BROADCOM, PCI_PRODUCT_BROADCOM_BCM5727,
105 		"Broadcom BCM5727 Gigabit Ethernet" },
106 	{ PCI_VENDOR_BROADCOM, PCI_PRODUCT_BROADCOM_BCM5762,
107 		"Broadcom BCM5762 Gigabit Ethernet" },
108 
109 	{ PCI_VENDOR_BROADCOM, PCI_PRODUCT_BROADCOM_BCM57761,
110 		"Broadcom BCM57761 Gigabit Ethernet" },
111 	{ PCI_VENDOR_BROADCOM, PCI_PRODUCT_BROADCOM_BCM57762,
112 		"Broadcom BCM57762 Gigabit Ethernet" },
113 	{ PCI_VENDOR_BROADCOM, PCI_PRODUCT_BROADCOM_BCM57765,
114 		"Broadcom BCM57765 Gigabit Ethernet" },
115 	{ PCI_VENDOR_BROADCOM, PCI_PRODUCT_BROADCOM_BCM57766,
116 		"Broadcom BCM57766 Gigabit Ethernet" },
117 	{ PCI_VENDOR_BROADCOM, PCI_PRODUCT_BROADCOM_BCM57781,
118 		"Broadcom BCM57781 Gigabit Ethernet" },
119 	{ PCI_VENDOR_BROADCOM, PCI_PRODUCT_BROADCOM_BCM57782,
120 		"Broadcom BCM57782 Gigabit Ethernet" },
121 	{ PCI_VENDOR_BROADCOM, PCI_PRODUCT_BROADCOM_BCM57785,
122 		"Broadcom BCM57785 Gigabit Ethernet" },
123 	{ PCI_VENDOR_BROADCOM, PCI_PRODUCT_BROADCOM_BCM57786,
124 		"Broadcom BCM57786 Gigabit Ethernet" },
125 	{ PCI_VENDOR_BROADCOM, PCI_PRODUCT_BROADCOM_BCM57791,
126 		"Broadcom BCM57791 Fast Ethernet" },
127 	{ PCI_VENDOR_BROADCOM, PCI_PRODUCT_BROADCOM_BCM57795,
128 		"Broadcom BCM57795 Fast Ethernet" },
129 
130 	{ 0, 0, NULL }
131 };
132 
133 static const int bnx_tx_mailbox[BNX_TX_RING_MAX] = {
134 	BGE_MBX_TX_HOST_PROD0_LO,
135 	BGE_MBX_TX_HOST_PROD0_HI,
136 	BGE_MBX_TX_HOST_PROD1_LO,
137 	BGE_MBX_TX_HOST_PROD1_HI
138 };
139 
140 #define BNX_IS_JUMBO_CAPABLE(sc)	((sc)->bnx_flags & BNX_FLAG_JUMBO)
141 #define BNX_IS_5717_PLUS(sc)		((sc)->bnx_flags & BNX_FLAG_5717_PLUS)
142 #define BNX_IS_57765_PLUS(sc)		((sc)->bnx_flags & BNX_FLAG_57765_PLUS)
143 #define BNX_IS_57765_FAMILY(sc)	 \
144 	((sc)->bnx_flags & BNX_FLAG_57765_FAMILY)
145 
146 typedef int	(*bnx_eaddr_fcn_t)(struct bnx_softc *, uint8_t[]);
147 
148 static int	bnx_probe(device_t);
149 static int	bnx_attach(device_t);
150 static int	bnx_detach(device_t);
151 static void	bnx_shutdown(device_t);
152 static int	bnx_suspend(device_t);
153 static int	bnx_resume(device_t);
154 static int	bnx_miibus_readreg(device_t, int, int);
155 static int	bnx_miibus_writereg(device_t, int, int, int);
156 static void	bnx_miibus_statchg(device_t);
157 
158 #ifdef IFPOLL_ENABLE
159 static void	bnx_npoll(struct ifnet *, struct ifpoll_info *);
160 static void	bnx_npoll_rx(struct ifnet *, void *, int);
161 static void	bnx_npoll_tx(struct ifnet *, void *, int);
162 static void	bnx_npoll_status(struct ifnet *);
163 #endif
164 static void	bnx_intr_legacy(void *);
165 static void	bnx_msi(void *);
166 static void	bnx_intr(struct bnx_softc *);
167 static void	bnx_enable_intr(struct bnx_softc *);
168 static void	bnx_disable_intr(struct bnx_softc *);
169 static void	bnx_txeof(struct bnx_tx_ring *, uint16_t);
170 static void	bnx_rxeof(struct bnx_rx_ret_ring *, uint16_t, int);
171 static int	bnx_alloc_intr(struct bnx_softc *);
172 static int	bnx_setup_intr(struct bnx_softc *);
173 static void	bnx_free_intr(struct bnx_softc *);
174 static void	bnx_teardown_intr(struct bnx_softc *, int);
175 static void	bnx_check_intr(void *);
176 static void	bnx_rx_std_refill_ithread(void *);
177 static void	bnx_rx_std_refill(void *, void *);
178 
179 static void	bnx_start(struct ifnet *, struct ifaltq_subque *);
180 static int	bnx_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
181 static void	bnx_init(void *);
182 static void	bnx_stop(struct bnx_softc *);
183 static void	bnx_watchdog(struct ifaltq_subque *);
184 static int	bnx_ifmedia_upd(struct ifnet *);
185 static void	bnx_ifmedia_sts(struct ifnet *, struct ifmediareq *);
186 static void	bnx_tick(void *);
187 static void	bnx_serialize(struct ifnet *, enum ifnet_serialize);
188 static void	bnx_deserialize(struct ifnet *, enum ifnet_serialize);
189 static int	bnx_tryserialize(struct ifnet *, enum ifnet_serialize);
190 #ifdef INVARIANTS
191 static void	bnx_serialize_assert(struct ifnet *, enum ifnet_serialize,
192 		    boolean_t);
193 #endif
194 
195 static int	bnx_alloc_jumbo_mem(struct bnx_softc *);
196 static void	bnx_free_jumbo_mem(struct bnx_softc *);
197 static struct bnx_jslot
198 		*bnx_jalloc(struct bnx_softc *);
199 static void	bnx_jfree(void *);
200 static void	bnx_jref(void *);
201 static int	bnx_newbuf_std(struct bnx_rx_ret_ring *, int, int);
202 static int	bnx_newbuf_jumbo(struct bnx_softc *, int, int);
203 static void	bnx_setup_rxdesc_std(struct bnx_rx_std_ring *, int);
204 static void	bnx_setup_rxdesc_jumbo(struct bnx_softc *, int);
205 static int	bnx_init_rx_ring_std(struct bnx_rx_std_ring *);
206 static void	bnx_free_rx_ring_std(struct bnx_rx_std_ring *);
207 static int	bnx_init_rx_ring_jumbo(struct bnx_softc *);
208 static void	bnx_free_rx_ring_jumbo(struct bnx_softc *);
209 static void	bnx_free_tx_ring(struct bnx_tx_ring *);
210 static int	bnx_init_tx_ring(struct bnx_tx_ring *);
211 static int	bnx_create_tx_ring(struct bnx_tx_ring *);
212 static void	bnx_destroy_tx_ring(struct bnx_tx_ring *);
213 static int	bnx_create_rx_ret_ring(struct bnx_rx_ret_ring *);
214 static void	bnx_destroy_rx_ret_ring(struct bnx_rx_ret_ring *);
215 static int	bnx_dma_alloc(device_t);
216 static void	bnx_dma_free(struct bnx_softc *);
217 static int	bnx_dma_block_alloc(struct bnx_softc *, bus_size_t,
218 		    bus_dma_tag_t *, bus_dmamap_t *, void **, bus_addr_t *);
219 static void	bnx_dma_block_free(bus_dma_tag_t, bus_dmamap_t, void *);
220 static struct mbuf *
221 		bnx_defrag_shortdma(struct mbuf *);
222 static int	bnx_encap(struct bnx_tx_ring *, struct mbuf **,
223 		    uint32_t *, int *);
224 static int	bnx_setup_tso(struct bnx_tx_ring *, struct mbuf **,
225 		    uint16_t *, uint16_t *);
226 static void	bnx_setup_serialize(struct bnx_softc *);
227 static void	bnx_set_tick_cpuid(struct bnx_softc *, boolean_t);
228 
229 static void	bnx_reset(struct bnx_softc *);
230 static int	bnx_chipinit(struct bnx_softc *);
231 static int	bnx_blockinit(struct bnx_softc *);
232 static void	bnx_stop_block(struct bnx_softc *, bus_size_t, uint32_t);
233 static void	bnx_enable_msi(struct bnx_softc *sc);
234 static void	bnx_setmulti(struct bnx_softc *);
235 static void	bnx_setpromisc(struct bnx_softc *);
236 static void	bnx_stats_update_regs(struct bnx_softc *);
237 static uint32_t	bnx_dma_swap_options(struct bnx_softc *);
238 
239 static uint32_t	bnx_readmem_ind(struct bnx_softc *, uint32_t);
240 static void	bnx_writemem_ind(struct bnx_softc *, uint32_t, uint32_t);
241 #ifdef notdef
242 static uint32_t	bnx_readreg_ind(struct bnx_softc *, uint32_t);
243 #endif
244 static void	bnx_writemem_direct(struct bnx_softc *, uint32_t, uint32_t);
245 static void	bnx_writembx(struct bnx_softc *, int, int);
246 static int	bnx_read_nvram(struct bnx_softc *, caddr_t, int, int);
247 static uint8_t	bnx_eeprom_getbyte(struct bnx_softc *, uint32_t, uint8_t *);
248 static int	bnx_read_eeprom(struct bnx_softc *, caddr_t, uint32_t, size_t);
249 
250 static void	bnx_tbi_link_upd(struct bnx_softc *, uint32_t);
251 static void	bnx_copper_link_upd(struct bnx_softc *, uint32_t);
252 static void	bnx_autopoll_link_upd(struct bnx_softc *, uint32_t);
253 static void	bnx_link_poll(struct bnx_softc *);
254 
255 static int	bnx_get_eaddr_mem(struct bnx_softc *, uint8_t[]);
256 static int	bnx_get_eaddr_nvram(struct bnx_softc *, uint8_t[]);
257 static int	bnx_get_eaddr_eeprom(struct bnx_softc *, uint8_t[]);
258 static int	bnx_get_eaddr(struct bnx_softc *, uint8_t[]);
259 
260 static void	bnx_coal_change(struct bnx_softc *);
261 static int	bnx_sysctl_force_defrag(SYSCTL_HANDLER_ARGS);
262 static int	bnx_sysctl_tx_wreg(SYSCTL_HANDLER_ARGS);
263 static int	bnx_sysctl_rx_coal_ticks(SYSCTL_HANDLER_ARGS);
264 static int	bnx_sysctl_tx_coal_ticks(SYSCTL_HANDLER_ARGS);
265 static int	bnx_sysctl_rx_coal_bds(SYSCTL_HANDLER_ARGS);
266 static int	bnx_sysctl_tx_coal_bds(SYSCTL_HANDLER_ARGS);
267 static int	bnx_sysctl_rx_coal_bds_int(SYSCTL_HANDLER_ARGS);
268 static int	bnx_sysctl_tx_coal_bds_int(SYSCTL_HANDLER_ARGS);
269 static int	bnx_sysctl_coal_chg(SYSCTL_HANDLER_ARGS, uint32_t *,
270 		    int, int, uint32_t);
271 #ifdef IFPOLL_ENABLE
272 static int	bnx_sysctl_npoll_offset(SYSCTL_HANDLER_ARGS);
273 static int	bnx_sysctl_npoll_rxoff(SYSCTL_HANDLER_ARGS);
274 static int	bnx_sysctl_npoll_txoff(SYSCTL_HANDLER_ARGS);
275 #endif
276 static int	bnx_sysctl_std_refill(SYSCTL_HANDLER_ARGS);
277 
278 static int	bnx_msi_enable = 1;
279 TUNABLE_INT("hw.bnx.msi.enable", &bnx_msi_enable);
280 
281 static device_method_t bnx_methods[] = {
282 	/* Device interface */
283 	DEVMETHOD(device_probe,		bnx_probe),
284 	DEVMETHOD(device_attach,	bnx_attach),
285 	DEVMETHOD(device_detach,	bnx_detach),
286 	DEVMETHOD(device_shutdown,	bnx_shutdown),
287 	DEVMETHOD(device_suspend,	bnx_suspend),
288 	DEVMETHOD(device_resume,	bnx_resume),
289 
290 	/* bus interface */
291 	DEVMETHOD(bus_print_child,	bus_generic_print_child),
292 	DEVMETHOD(bus_driver_added,	bus_generic_driver_added),
293 
294 	/* MII interface */
295 	DEVMETHOD(miibus_readreg,	bnx_miibus_readreg),
296 	DEVMETHOD(miibus_writereg,	bnx_miibus_writereg),
297 	DEVMETHOD(miibus_statchg,	bnx_miibus_statchg),
298 
299 	DEVMETHOD_END
300 };
301 
302 static DEFINE_CLASS_0(bnx, bnx_driver, bnx_methods, sizeof(struct bnx_softc));
303 static devclass_t bnx_devclass;
304 
305 DECLARE_DUMMY_MODULE(if_bnx);
306 DRIVER_MODULE(if_bnx, pci, bnx_driver, bnx_devclass, NULL, NULL);
307 DRIVER_MODULE(miibus, bnx, miibus_driver, miibus_devclass, NULL, NULL);
308 
309 static uint32_t
310 bnx_readmem_ind(struct bnx_softc *sc, uint32_t off)
311 {
312 	device_t dev = sc->bnx_dev;
313 	uint32_t val;
314 
315 	pci_write_config(dev, BGE_PCI_MEMWIN_BASEADDR, off, 4);
316 	val = pci_read_config(dev, BGE_PCI_MEMWIN_DATA, 4);
317 	pci_write_config(dev, BGE_PCI_MEMWIN_BASEADDR, 0, 4);
318 	return (val);
319 }
320 
321 static void
322 bnx_writemem_ind(struct bnx_softc *sc, uint32_t off, uint32_t val)
323 {
324 	device_t dev = sc->bnx_dev;
325 
326 	pci_write_config(dev, BGE_PCI_MEMWIN_BASEADDR, off, 4);
327 	pci_write_config(dev, BGE_PCI_MEMWIN_DATA, val, 4);
328 	pci_write_config(dev, BGE_PCI_MEMWIN_BASEADDR, 0, 4);
329 }
330 
331 static void
332 bnx_writemem_direct(struct bnx_softc *sc, uint32_t off, uint32_t val)
333 {
334 	CSR_WRITE_4(sc, off, val);
335 }
336 
337 static void
338 bnx_writembx(struct bnx_softc *sc, int off, int val)
339 {
340 	CSR_WRITE_4(sc, off, val);
341 }
342 
343 /*
344  * Read a sequence of bytes from NVRAM.
345  */
346 static int
347 bnx_read_nvram(struct bnx_softc *sc, caddr_t dest, int off, int cnt)
348 {
349 	return (1);
350 }
351 
352 /*
353  * Read a byte of data stored in the EEPROM at address 'addr.' The
354  * BCM570x supports both the traditional bitbang interface and an
355  * auto access interface for reading the EEPROM. We use the auto
356  * access method.
357  */
358 static uint8_t
359 bnx_eeprom_getbyte(struct bnx_softc *sc, uint32_t addr, uint8_t *dest)
360 {
361 	int i;
362 	uint32_t byte = 0;
363 
364 	/*
365 	 * Enable use of auto EEPROM access so we can avoid
366 	 * having to use the bitbang method.
367 	 */
368 	BNX_SETBIT(sc, BGE_MISC_LOCAL_CTL, BGE_MLC_AUTO_EEPROM);
369 
370 	/* Reset the EEPROM, load the clock period. */
371 	CSR_WRITE_4(sc, BGE_EE_ADDR,
372 	    BGE_EEADDR_RESET|BGE_EEHALFCLK(BGE_HALFCLK_384SCL));
373 	DELAY(20);
374 
375 	/* Issue the read EEPROM command. */
376 	CSR_WRITE_4(sc, BGE_EE_ADDR, BGE_EE_READCMD | addr);
377 
378 	/* Wait for completion */
379 	for(i = 0; i < BNX_TIMEOUT * 10; i++) {
380 		DELAY(10);
381 		if (CSR_READ_4(sc, BGE_EE_ADDR) & BGE_EEADDR_DONE)
382 			break;
383 	}
384 
385 	if (i == BNX_TIMEOUT) {
386 		if_printf(&sc->arpcom.ac_if, "eeprom read timed out\n");
387 		return(1);
388 	}
389 
390 	/* Get result. */
391 	byte = CSR_READ_4(sc, BGE_EE_DATA);
392 
393         *dest = (byte >> ((addr % 4) * 8)) & 0xFF;
394 
395 	return(0);
396 }
397 
398 /*
399  * Read a sequence of bytes from the EEPROM.
400  */
401 static int
402 bnx_read_eeprom(struct bnx_softc *sc, caddr_t dest, uint32_t off, size_t len)
403 {
404 	size_t i;
405 	int err;
406 	uint8_t byte;
407 
408 	for (byte = 0, err = 0, i = 0; i < len; i++) {
409 		err = bnx_eeprom_getbyte(sc, off + i, &byte);
410 		if (err)
411 			break;
412 		*(dest + i) = byte;
413 	}
414 
415 	return(err ? 1 : 0);
416 }
417 
418 static int
419 bnx_miibus_readreg(device_t dev, int phy, int reg)
420 {
421 	struct bnx_softc *sc = device_get_softc(dev);
422 	uint32_t val;
423 	int i;
424 
425 	KASSERT(phy == sc->bnx_phyno,
426 	    ("invalid phyno %d, should be %d", phy, sc->bnx_phyno));
427 
428 	/* Clear the autopoll bit if set, otherwise may trigger PCI errors. */
429 	if (sc->bnx_mi_mode & BGE_MIMODE_AUTOPOLL) {
430 		CSR_WRITE_4(sc, BGE_MI_MODE,
431 		    sc->bnx_mi_mode & ~BGE_MIMODE_AUTOPOLL);
432 		DELAY(80);
433 	}
434 
435 	CSR_WRITE_4(sc, BGE_MI_COMM, BGE_MICMD_READ | BGE_MICOMM_BUSY |
436 	    BGE_MIPHY(phy) | BGE_MIREG(reg));
437 
438 	/* Poll for the PHY register access to complete. */
439 	for (i = 0; i < BNX_TIMEOUT; i++) {
440 		DELAY(10);
441 		val = CSR_READ_4(sc, BGE_MI_COMM);
442 		if ((val & BGE_MICOMM_BUSY) == 0) {
443 			DELAY(5);
444 			val = CSR_READ_4(sc, BGE_MI_COMM);
445 			break;
446 		}
447 	}
448 	if (i == BNX_TIMEOUT) {
449 		if_printf(&sc->arpcom.ac_if, "PHY read timed out "
450 		    "(phy %d, reg %d, val 0x%08x)\n", phy, reg, val);
451 		val = 0;
452 	}
453 
454 	/* Restore the autopoll bit if necessary. */
455 	if (sc->bnx_mi_mode & BGE_MIMODE_AUTOPOLL) {
456 		CSR_WRITE_4(sc, BGE_MI_MODE, sc->bnx_mi_mode);
457 		DELAY(80);
458 	}
459 
460 	if (val & BGE_MICOMM_READFAIL)
461 		return 0;
462 
463 	return (val & 0xFFFF);
464 }
465 
466 static int
467 bnx_miibus_writereg(device_t dev, int phy, int reg, int val)
468 {
469 	struct bnx_softc *sc = device_get_softc(dev);
470 	int i;
471 
472 	KASSERT(phy == sc->bnx_phyno,
473 	    ("invalid phyno %d, should be %d", phy, sc->bnx_phyno));
474 
475 	/* Clear the autopoll bit if set, otherwise may trigger PCI errors. */
476 	if (sc->bnx_mi_mode & BGE_MIMODE_AUTOPOLL) {
477 		CSR_WRITE_4(sc, BGE_MI_MODE,
478 		    sc->bnx_mi_mode & ~BGE_MIMODE_AUTOPOLL);
479 		DELAY(80);
480 	}
481 
482 	CSR_WRITE_4(sc, BGE_MI_COMM, BGE_MICMD_WRITE | BGE_MICOMM_BUSY |
483 	    BGE_MIPHY(phy) | BGE_MIREG(reg) | val);
484 
485 	for (i = 0; i < BNX_TIMEOUT; i++) {
486 		DELAY(10);
487 		if (!(CSR_READ_4(sc, BGE_MI_COMM) & BGE_MICOMM_BUSY)) {
488 			DELAY(5);
489 			CSR_READ_4(sc, BGE_MI_COMM); /* dummy read */
490 			break;
491 		}
492 	}
493 	if (i == BNX_TIMEOUT) {
494 		if_printf(&sc->arpcom.ac_if, "PHY write timed out "
495 		    "(phy %d, reg %d, val %d)\n", phy, reg, val);
496 	}
497 
498 	/* Restore the autopoll bit if necessary. */
499 	if (sc->bnx_mi_mode & BGE_MIMODE_AUTOPOLL) {
500 		CSR_WRITE_4(sc, BGE_MI_MODE, sc->bnx_mi_mode);
501 		DELAY(80);
502 	}
503 
504 	return 0;
505 }
506 
507 static void
508 bnx_miibus_statchg(device_t dev)
509 {
510 	struct bnx_softc *sc;
511 	struct mii_data *mii;
512 
513 	sc = device_get_softc(dev);
514 	mii = device_get_softc(sc->bnx_miibus);
515 
516 	if ((mii->mii_media_status & (IFM_ACTIVE | IFM_AVALID)) ==
517 	    (IFM_ACTIVE | IFM_AVALID)) {
518 		switch (IFM_SUBTYPE(mii->mii_media_active)) {
519 		case IFM_10_T:
520 		case IFM_100_TX:
521 			sc->bnx_link = 1;
522 			break;
523 		case IFM_1000_T:
524 		case IFM_1000_SX:
525 		case IFM_2500_SX:
526 			sc->bnx_link = 1;
527 			break;
528 		default:
529 			sc->bnx_link = 0;
530 			break;
531 		}
532 	} else {
533 		sc->bnx_link = 0;
534 	}
535 	if (sc->bnx_link == 0)
536 		return;
537 
538 	BNX_CLRBIT(sc, BGE_MAC_MODE, BGE_MACMODE_PORTMODE);
539 	if (IFM_SUBTYPE(mii->mii_media_active) == IFM_1000_T ||
540 	    IFM_SUBTYPE(mii->mii_media_active) == IFM_1000_SX) {
541 		BNX_SETBIT(sc, BGE_MAC_MODE, BGE_PORTMODE_GMII);
542 	} else {
543 		BNX_SETBIT(sc, BGE_MAC_MODE, BGE_PORTMODE_MII);
544 	}
545 
546 	if ((mii->mii_media_active & IFM_GMASK) == IFM_FDX) {
547 		BNX_CLRBIT(sc, BGE_MAC_MODE, BGE_MACMODE_HALF_DUPLEX);
548 	} else {
549 		BNX_SETBIT(sc, BGE_MAC_MODE, BGE_MACMODE_HALF_DUPLEX);
550 	}
551 }
552 
553 /*
554  * Memory management for jumbo frames.
555  */
556 static int
557 bnx_alloc_jumbo_mem(struct bnx_softc *sc)
558 {
559 	struct ifnet *ifp = &sc->arpcom.ac_if;
560 	struct bnx_jslot *entry;
561 	uint8_t *ptr;
562 	bus_addr_t paddr;
563 	int i, error;
564 
565 	/*
566 	 * Create tag for jumbo mbufs.
567 	 * This is really a bit of a kludge. We allocate a special
568 	 * jumbo buffer pool which (thanks to the way our DMA
569 	 * memory allocation works) will consist of contiguous
570 	 * pages. This means that even though a jumbo buffer might
571 	 * be larger than a page size, we don't really need to
572 	 * map it into more than one DMA segment. However, the
573 	 * default mbuf tag will result in multi-segment mappings,
574 	 * so we have to create a special jumbo mbuf tag that
575 	 * lets us get away with mapping the jumbo buffers as
576 	 * a single segment. I think eventually the driver should
577 	 * be changed so that it uses ordinary mbufs and cluster
578 	 * buffers, i.e. jumbo frames can span multiple DMA
579 	 * descriptors. But that's a project for another day.
580 	 */
581 
582 	/*
583 	 * Create DMA stuffs for jumbo RX ring.
584 	 */
585 	error = bnx_dma_block_alloc(sc, BGE_JUMBO_RX_RING_SZ,
586 				    &sc->bnx_cdata.bnx_rx_jumbo_ring_tag,
587 				    &sc->bnx_cdata.bnx_rx_jumbo_ring_map,
588 				    (void *)&sc->bnx_ldata.bnx_rx_jumbo_ring,
589 				    &sc->bnx_ldata.bnx_rx_jumbo_ring_paddr);
590 	if (error) {
591 		if_printf(ifp, "could not create jumbo RX ring\n");
592 		return error;
593 	}
594 
595 	/*
596 	 * Create DMA stuffs for jumbo buffer block.
597 	 */
598 	error = bnx_dma_block_alloc(sc, BNX_JMEM,
599 				    &sc->bnx_cdata.bnx_jumbo_tag,
600 				    &sc->bnx_cdata.bnx_jumbo_map,
601 				    (void **)&sc->bnx_ldata.bnx_jumbo_buf,
602 				    &paddr);
603 	if (error) {
604 		if_printf(ifp, "could not create jumbo buffer\n");
605 		return error;
606 	}
607 
608 	SLIST_INIT(&sc->bnx_jfree_listhead);
609 
610 	/*
611 	 * Now divide it up into 9K pieces and save the addresses
612 	 * in an array. Note that we play an evil trick here by using
613 	 * the first few bytes in the buffer to hold the the address
614 	 * of the softc structure for this interface. This is because
615 	 * bnx_jfree() needs it, but it is called by the mbuf management
616 	 * code which will not pass it to us explicitly.
617 	 */
618 	for (i = 0, ptr = sc->bnx_ldata.bnx_jumbo_buf; i < BNX_JSLOTS; i++) {
619 		entry = &sc->bnx_cdata.bnx_jslots[i];
620 		entry->bnx_sc = sc;
621 		entry->bnx_buf = ptr;
622 		entry->bnx_paddr = paddr;
623 		entry->bnx_inuse = 0;
624 		entry->bnx_slot = i;
625 		SLIST_INSERT_HEAD(&sc->bnx_jfree_listhead, entry, jslot_link);
626 
627 		ptr += BNX_JLEN;
628 		paddr += BNX_JLEN;
629 	}
630 	return 0;
631 }
632 
633 static void
634 bnx_free_jumbo_mem(struct bnx_softc *sc)
635 {
636 	/* Destroy jumbo RX ring. */
637 	bnx_dma_block_free(sc->bnx_cdata.bnx_rx_jumbo_ring_tag,
638 			   sc->bnx_cdata.bnx_rx_jumbo_ring_map,
639 			   sc->bnx_ldata.bnx_rx_jumbo_ring);
640 
641 	/* Destroy jumbo buffer block. */
642 	bnx_dma_block_free(sc->bnx_cdata.bnx_jumbo_tag,
643 			   sc->bnx_cdata.bnx_jumbo_map,
644 			   sc->bnx_ldata.bnx_jumbo_buf);
645 }
646 
647 /*
648  * Allocate a jumbo buffer.
649  */
650 static struct bnx_jslot *
651 bnx_jalloc(struct bnx_softc *sc)
652 {
653 	struct bnx_jslot *entry;
654 
655 	lwkt_serialize_enter(&sc->bnx_jslot_serializer);
656 	entry = SLIST_FIRST(&sc->bnx_jfree_listhead);
657 	if (entry) {
658 		SLIST_REMOVE_HEAD(&sc->bnx_jfree_listhead, jslot_link);
659 		entry->bnx_inuse = 1;
660 	} else {
661 		if_printf(&sc->arpcom.ac_if, "no free jumbo buffers\n");
662 	}
663 	lwkt_serialize_exit(&sc->bnx_jslot_serializer);
664 	return(entry);
665 }
666 
667 /*
668  * Adjust usage count on a jumbo buffer.
669  */
670 static void
671 bnx_jref(void *arg)
672 {
673 	struct bnx_jslot *entry = (struct bnx_jslot *)arg;
674 	struct bnx_softc *sc = entry->bnx_sc;
675 
676 	if (sc == NULL)
677 		panic("bnx_jref: can't find softc pointer!");
678 
679 	if (&sc->bnx_cdata.bnx_jslots[entry->bnx_slot] != entry) {
680 		panic("bnx_jref: asked to reference buffer "
681 		    "that we don't manage!");
682 	} else if (entry->bnx_inuse == 0) {
683 		panic("bnx_jref: buffer already free!");
684 	} else {
685 		atomic_add_int(&entry->bnx_inuse, 1);
686 	}
687 }
688 
689 /*
690  * Release a jumbo buffer.
691  */
692 static void
693 bnx_jfree(void *arg)
694 {
695 	struct bnx_jslot *entry = (struct bnx_jslot *)arg;
696 	struct bnx_softc *sc = entry->bnx_sc;
697 
698 	if (sc == NULL)
699 		panic("bnx_jfree: can't find softc pointer!");
700 
701 	if (&sc->bnx_cdata.bnx_jslots[entry->bnx_slot] != entry) {
702 		panic("bnx_jfree: asked to free buffer that we don't manage!");
703 	} else if (entry->bnx_inuse == 0) {
704 		panic("bnx_jfree: buffer already free!");
705 	} else {
706 		/*
707 		 * Possible MP race to 0, use the serializer.  The atomic insn
708 		 * is still needed for races against bnx_jref().
709 		 */
710 		lwkt_serialize_enter(&sc->bnx_jslot_serializer);
711 		atomic_subtract_int(&entry->bnx_inuse, 1);
712 		if (entry->bnx_inuse == 0) {
713 			SLIST_INSERT_HEAD(&sc->bnx_jfree_listhead,
714 					  entry, jslot_link);
715 		}
716 		lwkt_serialize_exit(&sc->bnx_jslot_serializer);
717 	}
718 }
719 
720 
721 /*
722  * Intialize a standard receive ring descriptor.
723  */
724 static int
725 bnx_newbuf_std(struct bnx_rx_ret_ring *ret, int i, int init)
726 {
727 	struct mbuf *m_new = NULL;
728 	bus_dma_segment_t seg;
729 	bus_dmamap_t map;
730 	int error, nsegs;
731 	struct bnx_rx_buf *rb;
732 
733 	rb = &ret->bnx_std->bnx_rx_std_buf[i];
734 	KASSERT(!rb->bnx_rx_refilled, ("RX buf %dth has been refilled", i));
735 
736 	m_new = m_getcl(init ? MB_WAIT : MB_DONTWAIT, MT_DATA, M_PKTHDR);
737 	if (m_new == NULL) {
738 		error = ENOBUFS;
739 		goto back;
740 	}
741 	m_new->m_len = m_new->m_pkthdr.len = MCLBYTES;
742 	m_adj(m_new, ETHER_ALIGN);
743 
744 	error = bus_dmamap_load_mbuf_segment(ret->bnx_rx_mtag,
745 	    ret->bnx_rx_tmpmap, m_new, &seg, 1, &nsegs, BUS_DMA_NOWAIT);
746 	if (error) {
747 		m_freem(m_new);
748 		goto back;
749 	}
750 
751 	if (!init) {
752 		bus_dmamap_sync(ret->bnx_rx_mtag, rb->bnx_rx_dmamap,
753 		    BUS_DMASYNC_POSTREAD);
754 		bus_dmamap_unload(ret->bnx_rx_mtag, rb->bnx_rx_dmamap);
755 	}
756 
757 	map = ret->bnx_rx_tmpmap;
758 	ret->bnx_rx_tmpmap = rb->bnx_rx_dmamap;
759 
760 	rb->bnx_rx_dmamap = map;
761 	rb->bnx_rx_mbuf = m_new;
762 	rb->bnx_rx_paddr = seg.ds_addr;
763 back:
764 	cpu_sfence();
765 	rb->bnx_rx_refilled = 1;
766 	return error;
767 }
768 
769 static void
770 bnx_setup_rxdesc_std(struct bnx_rx_std_ring *std, int i)
771 {
772 	struct bnx_rx_buf *rb;
773 	struct bge_rx_bd *r;
774 
775 	rb = &std->bnx_rx_std_buf[i];
776 	KASSERT(rb->bnx_rx_refilled, ("RX buf %dth is not refilled", i));
777 	rb->bnx_rx_refilled = 0;
778 
779 	r = &std->bnx_rx_std_ring[i];
780 	r->bge_addr.bge_addr_lo = BGE_ADDR_LO(rb->bnx_rx_paddr);
781 	r->bge_addr.bge_addr_hi = BGE_ADDR_HI(rb->bnx_rx_paddr);
782 	r->bge_len = rb->bnx_rx_mbuf->m_len;
783 	r->bge_idx = i;
784 	r->bge_flags = BGE_RXBDFLAG_END;
785 }
786 
787 /*
788  * Initialize a jumbo receive ring descriptor. This allocates
789  * a jumbo buffer from the pool managed internally by the driver.
790  */
791 static int
792 bnx_newbuf_jumbo(struct bnx_softc *sc, int i, int init)
793 {
794 	struct mbuf *m_new = NULL;
795 	struct bnx_jslot *buf;
796 	bus_addr_t paddr;
797 
798 	/* Allocate the mbuf. */
799 	MGETHDR(m_new, init ? MB_WAIT : MB_DONTWAIT, MT_DATA);
800 	if (m_new == NULL)
801 		return ENOBUFS;
802 
803 	/* Allocate the jumbo buffer */
804 	buf = bnx_jalloc(sc);
805 	if (buf == NULL) {
806 		m_freem(m_new);
807 		return ENOBUFS;
808 	}
809 
810 	/* Attach the buffer to the mbuf. */
811 	m_new->m_ext.ext_arg = buf;
812 	m_new->m_ext.ext_buf = buf->bnx_buf;
813 	m_new->m_ext.ext_free = bnx_jfree;
814 	m_new->m_ext.ext_ref = bnx_jref;
815 	m_new->m_ext.ext_size = BNX_JUMBO_FRAMELEN;
816 
817 	m_new->m_flags |= M_EXT;
818 
819 	m_new->m_data = m_new->m_ext.ext_buf;
820 	m_new->m_len = m_new->m_pkthdr.len = m_new->m_ext.ext_size;
821 
822 	paddr = buf->bnx_paddr;
823 	m_adj(m_new, ETHER_ALIGN);
824 	paddr += ETHER_ALIGN;
825 
826 	/* Save necessary information */
827 	sc->bnx_cdata.bnx_rx_jumbo_chain[i].bnx_rx_mbuf = m_new;
828 	sc->bnx_cdata.bnx_rx_jumbo_chain[i].bnx_rx_paddr = paddr;
829 
830 	/* Set up the descriptor. */
831 	bnx_setup_rxdesc_jumbo(sc, i);
832 	return 0;
833 }
834 
835 static void
836 bnx_setup_rxdesc_jumbo(struct bnx_softc *sc, int i)
837 {
838 	struct bge_rx_bd *r;
839 	struct bnx_rx_buf *rc;
840 
841 	r = &sc->bnx_ldata.bnx_rx_jumbo_ring[i];
842 	rc = &sc->bnx_cdata.bnx_rx_jumbo_chain[i];
843 
844 	r->bge_addr.bge_addr_lo = BGE_ADDR_LO(rc->bnx_rx_paddr);
845 	r->bge_addr.bge_addr_hi = BGE_ADDR_HI(rc->bnx_rx_paddr);
846 	r->bge_len = rc->bnx_rx_mbuf->m_len;
847 	r->bge_idx = i;
848 	r->bge_flags = BGE_RXBDFLAG_END|BGE_RXBDFLAG_JUMBO_RING;
849 }
850 
851 static int
852 bnx_init_rx_ring_std(struct bnx_rx_std_ring *std)
853 {
854 	int i, error;
855 
856 	for (i = 0; i < BGE_STD_RX_RING_CNT; i++) {
857 		/* Use the first RX return ring's tmp RX mbuf DMA map */
858 		error = bnx_newbuf_std(&std->bnx_sc->bnx_rx_ret_ring[0], i, 1);
859 		if (error)
860 			return error;
861 		bnx_setup_rxdesc_std(std, i);
862 	}
863 
864 	std->bnx_rx_std_refill = 0;
865 	std->bnx_rx_std_running = 0;
866 	cpu_sfence();
867 	lwkt_serialize_handler_enable(&std->bnx_rx_std_serialize);
868 
869 	std->bnx_rx_std = BGE_STD_RX_RING_CNT - 1;
870 	bnx_writembx(std->bnx_sc, BGE_MBX_RX_STD_PROD_LO, std->bnx_rx_std);
871 
872 	return(0);
873 }
874 
875 static void
876 bnx_free_rx_ring_std(struct bnx_rx_std_ring *std)
877 {
878 	int i;
879 
880 	lwkt_serialize_handler_disable(&std->bnx_rx_std_serialize);
881 
882 	for (i = 0; i < BGE_STD_RX_RING_CNT; i++) {
883 		struct bnx_rx_buf *rb = &std->bnx_rx_std_buf[i];
884 
885 		rb->bnx_rx_refilled = 0;
886 		if (rb->bnx_rx_mbuf != NULL) {
887 			bus_dmamap_unload(std->bnx_rx_mtag, rb->bnx_rx_dmamap);
888 			m_freem(rb->bnx_rx_mbuf);
889 			rb->bnx_rx_mbuf = NULL;
890 		}
891 		bzero(&std->bnx_rx_std_ring[i], sizeof(struct bge_rx_bd));
892 	}
893 }
894 
895 static int
896 bnx_init_rx_ring_jumbo(struct bnx_softc *sc)
897 {
898 	struct bge_rcb *rcb;
899 	int i, error;
900 
901 	for (i = 0; i < BGE_JUMBO_RX_RING_CNT; i++) {
902 		error = bnx_newbuf_jumbo(sc, i, 1);
903 		if (error)
904 			return error;
905 	}
906 
907 	sc->bnx_jumbo = BGE_JUMBO_RX_RING_CNT - 1;
908 
909 	rcb = &sc->bnx_ldata.bnx_info.bnx_jumbo_rx_rcb;
910 	rcb->bge_maxlen_flags = BGE_RCB_MAXLEN_FLAGS(0, 0);
911 	CSR_WRITE_4(sc, BGE_RX_JUMBO_RCB_MAXLEN_FLAGS, rcb->bge_maxlen_flags);
912 
913 	bnx_writembx(sc, BGE_MBX_RX_JUMBO_PROD_LO, sc->bnx_jumbo);
914 
915 	return(0);
916 }
917 
918 static void
919 bnx_free_rx_ring_jumbo(struct bnx_softc *sc)
920 {
921 	int i;
922 
923 	for (i = 0; i < BGE_JUMBO_RX_RING_CNT; i++) {
924 		struct bnx_rx_buf *rc = &sc->bnx_cdata.bnx_rx_jumbo_chain[i];
925 
926 		if (rc->bnx_rx_mbuf != NULL) {
927 			m_freem(rc->bnx_rx_mbuf);
928 			rc->bnx_rx_mbuf = NULL;
929 		}
930 		bzero(&sc->bnx_ldata.bnx_rx_jumbo_ring[i],
931 		    sizeof(struct bge_rx_bd));
932 	}
933 }
934 
935 static void
936 bnx_free_tx_ring(struct bnx_tx_ring *txr)
937 {
938 	int i;
939 
940 	for (i = 0; i < BGE_TX_RING_CNT; i++) {
941 		struct bnx_tx_buf *buf = &txr->bnx_tx_buf[i];
942 
943 		if (buf->bnx_tx_mbuf != NULL) {
944 			bus_dmamap_unload(txr->bnx_tx_mtag,
945 			    buf->bnx_tx_dmamap);
946 			m_freem(buf->bnx_tx_mbuf);
947 			buf->bnx_tx_mbuf = NULL;
948 		}
949 		bzero(&txr->bnx_tx_ring[i], sizeof(struct bge_tx_bd));
950 	}
951 	txr->bnx_tx_saved_considx = BNX_TXCONS_UNSET;
952 }
953 
954 static int
955 bnx_init_tx_ring(struct bnx_tx_ring *txr)
956 {
957 	txr->bnx_tx_cnt = 0;
958 	txr->bnx_tx_saved_considx = 0;
959 	txr->bnx_tx_prodidx = 0;
960 
961 	/* Initialize transmit producer index for host-memory send ring. */
962 	bnx_writembx(txr->bnx_sc, txr->bnx_tx_mbx, txr->bnx_tx_prodidx);
963 
964 	return(0);
965 }
966 
967 static void
968 bnx_setmulti(struct bnx_softc *sc)
969 {
970 	struct ifnet *ifp;
971 	struct ifmultiaddr *ifma;
972 	uint32_t hashes[4] = { 0, 0, 0, 0 };
973 	int h, i;
974 
975 	ifp = &sc->arpcom.ac_if;
976 
977 	if (ifp->if_flags & IFF_ALLMULTI || ifp->if_flags & IFF_PROMISC) {
978 		for (i = 0; i < 4; i++)
979 			CSR_WRITE_4(sc, BGE_MAR0 + (i * 4), 0xFFFFFFFF);
980 		return;
981 	}
982 
983 	/* First, zot all the existing filters. */
984 	for (i = 0; i < 4; i++)
985 		CSR_WRITE_4(sc, BGE_MAR0 + (i * 4), 0);
986 
987 	/* Now program new ones. */
988 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
989 		if (ifma->ifma_addr->sa_family != AF_LINK)
990 			continue;
991 		h = ether_crc32_le(
992 		    LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
993 		    ETHER_ADDR_LEN) & 0x7f;
994 		hashes[(h & 0x60) >> 5] |= 1 << (h & 0x1F);
995 	}
996 
997 	for (i = 0; i < 4; i++)
998 		CSR_WRITE_4(sc, BGE_MAR0 + (i * 4), hashes[i]);
999 }
1000 
1001 /*
1002  * Do endian, PCI and DMA initialization. Also check the on-board ROM
1003  * self-test results.
1004  */
1005 static int
1006 bnx_chipinit(struct bnx_softc *sc)
1007 {
1008 	uint32_t dma_rw_ctl, mode_ctl;
1009 	int i;
1010 
1011 	/* Set endian type before we access any non-PCI registers. */
1012 	pci_write_config(sc->bnx_dev, BGE_PCI_MISC_CTL,
1013 	    BGE_INIT | BGE_PCIMISCCTL_TAGGED_STATUS, 4);
1014 
1015 	/* Clear the MAC control register */
1016 	CSR_WRITE_4(sc, BGE_MAC_MODE, 0);
1017 
1018 	/*
1019 	 * Clear the MAC statistics block in the NIC's
1020 	 * internal memory.
1021 	 */
1022 	for (i = BGE_STATS_BLOCK;
1023 	    i < BGE_STATS_BLOCK_END + 1; i += sizeof(uint32_t))
1024 		BNX_MEMWIN_WRITE(sc, i, 0);
1025 
1026 	for (i = BGE_STATUS_BLOCK;
1027 	    i < BGE_STATUS_BLOCK_END + 1; i += sizeof(uint32_t))
1028 		BNX_MEMWIN_WRITE(sc, i, 0);
1029 
1030 	if (BNX_IS_57765_FAMILY(sc)) {
1031 		uint32_t val;
1032 
1033 		if (sc->bnx_chipid == BGE_CHIPID_BCM57765_A0) {
1034 			mode_ctl = CSR_READ_4(sc, BGE_MODE_CTL);
1035 			val = mode_ctl & ~BGE_MODECTL_PCIE_PORTS;
1036 
1037 			/* Access the lower 1K of PL PCI-E block registers. */
1038 			CSR_WRITE_4(sc, BGE_MODE_CTL,
1039 			    val | BGE_MODECTL_PCIE_PL_SEL);
1040 
1041 			val = CSR_READ_4(sc, BGE_PCIE_PL_LO_PHYCTL5);
1042 			val |= BGE_PCIE_PL_LO_PHYCTL5_DIS_L2CLKREQ;
1043 			CSR_WRITE_4(sc, BGE_PCIE_PL_LO_PHYCTL5, val);
1044 
1045 			CSR_WRITE_4(sc, BGE_MODE_CTL, mode_ctl);
1046 		}
1047 		if (sc->bnx_chiprev != BGE_CHIPREV_57765_AX) {
1048 			/* Fix transmit hangs */
1049 			val = CSR_READ_4(sc, BGE_CPMU_PADRNG_CTL);
1050 			val |= BGE_CPMU_PADRNG_CTL_RDIV2;
1051 			CSR_WRITE_4(sc, BGE_CPMU_PADRNG_CTL, val);
1052 
1053 			mode_ctl = CSR_READ_4(sc, BGE_MODE_CTL);
1054 			val = mode_ctl & ~BGE_MODECTL_PCIE_PORTS;
1055 
1056 			/* Access the lower 1K of DL PCI-E block registers. */
1057 			CSR_WRITE_4(sc, BGE_MODE_CTL,
1058 			    val | BGE_MODECTL_PCIE_DL_SEL);
1059 
1060 			val = CSR_READ_4(sc, BGE_PCIE_DL_LO_FTSMAX);
1061 			val &= ~BGE_PCIE_DL_LO_FTSMAX_MASK;
1062 			val |= BGE_PCIE_DL_LO_FTSMAX_VAL;
1063 			CSR_WRITE_4(sc, BGE_PCIE_DL_LO_FTSMAX, val);
1064 
1065 			CSR_WRITE_4(sc, BGE_MODE_CTL, mode_ctl);
1066 		}
1067 
1068 		val = CSR_READ_4(sc, BGE_CPMU_LSPD_10MB_CLK);
1069 		val &= ~BGE_CPMU_LSPD_10MB_MACCLK_MASK;
1070 		val |= BGE_CPMU_LSPD_10MB_MACCLK_6_25;
1071 		CSR_WRITE_4(sc, BGE_CPMU_LSPD_10MB_CLK, val);
1072 	}
1073 
1074 	/*
1075 	 * Set up the PCI DMA control register.
1076 	 */
1077 	dma_rw_ctl = pci_read_config(sc->bnx_dev, BGE_PCI_DMA_RW_CTL, 4);
1078 	/*
1079 	 * Disable 32bytes cache alignment for DMA write to host memory
1080 	 *
1081 	 * NOTE:
1082 	 * 64bytes cache alignment for DMA write to host memory is still
1083 	 * enabled.
1084 	 */
1085 	dma_rw_ctl |= BGE_PCIDMARWCTL_DIS_CACHE_ALIGNMENT;
1086 	if (sc->bnx_chipid == BGE_CHIPID_BCM57765_A0)
1087 		dma_rw_ctl &= ~BGE_PCIDMARWCTL_CRDRDR_RDMA_MRRS_MSK;
1088 	/*
1089 	 * Enable HW workaround for controllers that misinterpret
1090 	 * a status tag update and leave interrupts permanently
1091 	 * disabled.
1092 	 */
1093 	if (sc->bnx_asicrev != BGE_ASICREV_BCM5717 &&
1094 	    sc->bnx_asicrev != BGE_ASICREV_BCM5762 &&
1095 	    !BNX_IS_57765_FAMILY(sc))
1096 		dma_rw_ctl |= BGE_PCIDMARWCTL_TAGGED_STATUS_WA;
1097 	if (bootverbose) {
1098 		if_printf(&sc->arpcom.ac_if, "DMA read/write %#x\n",
1099 		    dma_rw_ctl);
1100 	}
1101 	pci_write_config(sc->bnx_dev, BGE_PCI_DMA_RW_CTL, dma_rw_ctl, 4);
1102 
1103 	/*
1104 	 * Set up general mode register.
1105 	 */
1106 	mode_ctl = bnx_dma_swap_options(sc) | BGE_MODECTL_MAC_ATTN_INTR |
1107 	    BGE_MODECTL_HOST_SEND_BDS | BGE_MODECTL_TX_NO_PHDR_CSUM;
1108 	CSR_WRITE_4(sc, BGE_MODE_CTL, mode_ctl);
1109 
1110 	/*
1111 	 * Disable memory write invalidate.  Apparently it is not supported
1112 	 * properly by these devices.  Also ensure that INTx isn't disabled,
1113 	 * as these chips need it even when using MSI.
1114 	 */
1115 	PCI_CLRBIT(sc->bnx_dev, BGE_PCI_CMD,
1116 	    (PCIM_CMD_MWRICEN | PCIM_CMD_INTxDIS), 4);
1117 
1118 	/* Set the timer prescaler (always 66Mhz) */
1119 	CSR_WRITE_4(sc, BGE_MISC_CFG, 65 << 1/*BGE_32BITTIME_66MHZ*/);
1120 
1121 	return(0);
1122 }
1123 
1124 static int
1125 bnx_blockinit(struct bnx_softc *sc)
1126 {
1127 	struct bnx_tx_ring *txr = &sc->bnx_tx_ring[0];
1128 	struct bnx_rx_ret_ring *ret = &sc->bnx_rx_ret_ring[0];
1129 	struct bnx_intr_data *intr = &sc->bnx_intr_data[0];
1130 	struct bge_rcb *rcb;
1131 	bus_size_t vrcb;
1132 	bge_hostaddr taddr;
1133 	uint32_t val;
1134 	int i, limit;
1135 
1136 	/*
1137 	 * Initialize the memory window pointer register so that
1138 	 * we can access the first 32K of internal NIC RAM. This will
1139 	 * allow us to set up the TX send ring RCBs and the RX return
1140 	 * ring RCBs, plus other things which live in NIC memory.
1141 	 */
1142 	CSR_WRITE_4(sc, BGE_PCI_MEMWIN_BASEADDR, 0);
1143 
1144 	/* Configure mbuf pool watermarks */
1145 	if (BNX_IS_57765_PLUS(sc)) {
1146 		CSR_WRITE_4(sc, BGE_BMAN_MBUFPOOL_READDMA_LOWAT, 0x0);
1147 		if (sc->arpcom.ac_if.if_mtu > ETHERMTU) {
1148 			CSR_WRITE_4(sc, BGE_BMAN_MBUFPOOL_MACRX_LOWAT, 0x7e);
1149 			CSR_WRITE_4(sc, BGE_BMAN_MBUFPOOL_HIWAT, 0xea);
1150 		} else {
1151 			CSR_WRITE_4(sc, BGE_BMAN_MBUFPOOL_MACRX_LOWAT, 0x2a);
1152 			CSR_WRITE_4(sc, BGE_BMAN_MBUFPOOL_HIWAT, 0xa0);
1153 		}
1154 	} else {
1155 		CSR_WRITE_4(sc, BGE_BMAN_MBUFPOOL_READDMA_LOWAT, 0x0);
1156 		CSR_WRITE_4(sc, BGE_BMAN_MBUFPOOL_MACRX_LOWAT, 0x10);
1157 		CSR_WRITE_4(sc, BGE_BMAN_MBUFPOOL_HIWAT, 0x60);
1158 	}
1159 
1160 	/* Configure DMA resource watermarks */
1161 	CSR_WRITE_4(sc, BGE_BMAN_DMA_DESCPOOL_LOWAT, 5);
1162 	CSR_WRITE_4(sc, BGE_BMAN_DMA_DESCPOOL_HIWAT, 10);
1163 
1164 	/* Enable buffer manager */
1165 	val = BGE_BMANMODE_ENABLE | BGE_BMANMODE_LOMBUF_ATTN;
1166 	/*
1167 	 * Change the arbitration algorithm of TXMBUF read request to
1168 	 * round-robin instead of priority based for BCM5719.  When
1169 	 * TXFIFO is almost empty, RDMA will hold its request until
1170 	 * TXFIFO is not almost empty.
1171 	 */
1172 	if (sc->bnx_asicrev == BGE_ASICREV_BCM5719)
1173 		val |= BGE_BMANMODE_NO_TX_UNDERRUN;
1174 	if (sc->bnx_asicrev == BGE_ASICREV_BCM5717 ||
1175 	    sc->bnx_chipid == BGE_CHIPID_BCM5719_A0 ||
1176 	    sc->bnx_chipid == BGE_CHIPID_BCM5720_A0)
1177 		val |= BGE_BMANMODE_LOMBUF_ATTN;
1178 	CSR_WRITE_4(sc, BGE_BMAN_MODE, val);
1179 
1180 	/* Poll for buffer manager start indication */
1181 	for (i = 0; i < BNX_TIMEOUT; i++) {
1182 		if (CSR_READ_4(sc, BGE_BMAN_MODE) & BGE_BMANMODE_ENABLE)
1183 			break;
1184 		DELAY(10);
1185 	}
1186 
1187 	if (i == BNX_TIMEOUT) {
1188 		if_printf(&sc->arpcom.ac_if,
1189 			  "buffer manager failed to start\n");
1190 		return(ENXIO);
1191 	}
1192 
1193 	/* Enable flow-through queues */
1194 	CSR_WRITE_4(sc, BGE_FTQ_RESET, 0xFFFFFFFF);
1195 	CSR_WRITE_4(sc, BGE_FTQ_RESET, 0);
1196 
1197 	/* Wait until queue initialization is complete */
1198 	for (i = 0; i < BNX_TIMEOUT; i++) {
1199 		if (CSR_READ_4(sc, BGE_FTQ_RESET) == 0)
1200 			break;
1201 		DELAY(10);
1202 	}
1203 
1204 	if (i == BNX_TIMEOUT) {
1205 		if_printf(&sc->arpcom.ac_if,
1206 			  "flow-through queue init failed\n");
1207 		return(ENXIO);
1208 	}
1209 
1210 	/*
1211 	 * Summary of rings supported by the controller:
1212 	 *
1213 	 * Standard Receive Producer Ring
1214 	 * - This ring is used to feed receive buffers for "standard"
1215 	 *   sized frames (typically 1536 bytes) to the controller.
1216 	 *
1217 	 * Jumbo Receive Producer Ring
1218 	 * - This ring is used to feed receive buffers for jumbo sized
1219 	 *   frames (i.e. anything bigger than the "standard" frames)
1220 	 *   to the controller.
1221 	 *
1222 	 * Mini Receive Producer Ring
1223 	 * - This ring is used to feed receive buffers for "mini"
1224 	 *   sized frames to the controller.
1225 	 * - This feature required external memory for the controller
1226 	 *   but was never used in a production system.  Should always
1227 	 *   be disabled.
1228 	 *
1229 	 * Receive Return Ring
1230 	 * - After the controller has placed an incoming frame into a
1231 	 *   receive buffer that buffer is moved into a receive return
1232 	 *   ring.  The driver is then responsible to passing the
1233 	 *   buffer up to the stack.  Many versions of the controller
1234 	 *   support multiple RR rings.
1235 	 *
1236 	 * Send Ring
1237 	 * - This ring is used for outgoing frames.  Many versions of
1238 	 *   the controller support multiple send rings.
1239 	 */
1240 
1241 	/* Initialize the standard receive producer ring control block. */
1242 	rcb = &sc->bnx_ldata.bnx_info.bnx_std_rx_rcb;
1243 	rcb->bge_hostaddr.bge_addr_lo =
1244 	    BGE_ADDR_LO(sc->bnx_rx_std_ring.bnx_rx_std_ring_paddr);
1245 	rcb->bge_hostaddr.bge_addr_hi =
1246 	    BGE_ADDR_HI(sc->bnx_rx_std_ring.bnx_rx_std_ring_paddr);
1247 	if (BNX_IS_57765_PLUS(sc)) {
1248 		/*
1249 		 * Bits 31-16: Programmable ring size (2048, 1024, 512, .., 32)
1250 		 * Bits 15-2 : Maximum RX frame size
1251 		 * Bit 1     : 1 = Ring Disabled, 0 = Ring ENabled
1252 		 * Bit 0     : Reserved
1253 		 */
1254 		rcb->bge_maxlen_flags =
1255 		    BGE_RCB_MAXLEN_FLAGS(512, BNX_MAX_FRAMELEN << 2);
1256 	} else {
1257 		/*
1258 		 * Bits 31-16: Programmable ring size (512, 256, 128, 64, 32)
1259 		 * Bits 15-2 : Reserved (should be 0)
1260 		 * Bit 1     : 1 = Ring Disabled, 0 = Ring Enabled
1261 		 * Bit 0     : Reserved
1262 		 */
1263 		rcb->bge_maxlen_flags = BGE_RCB_MAXLEN_FLAGS(512, 0);
1264 	}
1265 	if (BNX_IS_5717_PLUS(sc))
1266 		rcb->bge_nicaddr = BGE_STD_RX_RINGS_5717;
1267 	else
1268 		rcb->bge_nicaddr = BGE_STD_RX_RINGS;
1269 	/* Write the standard receive producer ring control block. */
1270 	CSR_WRITE_4(sc, BGE_RX_STD_RCB_HADDR_HI, rcb->bge_hostaddr.bge_addr_hi);
1271 	CSR_WRITE_4(sc, BGE_RX_STD_RCB_HADDR_LO, rcb->bge_hostaddr.bge_addr_lo);
1272 	CSR_WRITE_4(sc, BGE_RX_STD_RCB_MAXLEN_FLAGS, rcb->bge_maxlen_flags);
1273 	CSR_WRITE_4(sc, BGE_RX_STD_RCB_NICADDR, rcb->bge_nicaddr);
1274 	/* Reset the standard receive producer ring producer index. */
1275 	bnx_writembx(sc, BGE_MBX_RX_STD_PROD_LO, 0);
1276 
1277 	/*
1278 	 * Initialize the jumbo RX producer ring control
1279 	 * block.  We set the 'ring disabled' bit in the
1280 	 * flags field until we're actually ready to start
1281 	 * using this ring (i.e. once we set the MTU
1282 	 * high enough to require it).
1283 	 */
1284 	if (BNX_IS_JUMBO_CAPABLE(sc)) {
1285 		rcb = &sc->bnx_ldata.bnx_info.bnx_jumbo_rx_rcb;
1286 		/* Get the jumbo receive producer ring RCB parameters. */
1287 		rcb->bge_hostaddr.bge_addr_lo =
1288 		    BGE_ADDR_LO(sc->bnx_ldata.bnx_rx_jumbo_ring_paddr);
1289 		rcb->bge_hostaddr.bge_addr_hi =
1290 		    BGE_ADDR_HI(sc->bnx_ldata.bnx_rx_jumbo_ring_paddr);
1291 		rcb->bge_maxlen_flags =
1292 		    BGE_RCB_MAXLEN_FLAGS(BNX_MAX_FRAMELEN,
1293 		    BGE_RCB_FLAG_RING_DISABLED);
1294 		if (BNX_IS_5717_PLUS(sc))
1295 			rcb->bge_nicaddr = BGE_JUMBO_RX_RINGS_5717;
1296 		else
1297 			rcb->bge_nicaddr = BGE_JUMBO_RX_RINGS;
1298 		CSR_WRITE_4(sc, BGE_RX_JUMBO_RCB_HADDR_HI,
1299 		    rcb->bge_hostaddr.bge_addr_hi);
1300 		CSR_WRITE_4(sc, BGE_RX_JUMBO_RCB_HADDR_LO,
1301 		    rcb->bge_hostaddr.bge_addr_lo);
1302 		/* Program the jumbo receive producer ring RCB parameters. */
1303 		CSR_WRITE_4(sc, BGE_RX_JUMBO_RCB_MAXLEN_FLAGS,
1304 		    rcb->bge_maxlen_flags);
1305 		CSR_WRITE_4(sc, BGE_RX_JUMBO_RCB_NICADDR, rcb->bge_nicaddr);
1306 		/* Reset the jumbo receive producer ring producer index. */
1307 		bnx_writembx(sc, BGE_MBX_RX_JUMBO_PROD_LO, 0);
1308 	}
1309 
1310 	/*
1311 	 * The BD ring replenish thresholds control how often the
1312 	 * hardware fetches new BD's from the producer rings in host
1313 	 * memory.  Setting the value too low on a busy system can
1314 	 * starve the hardware and recue the throughpout.
1315 	 *
1316 	 * Set the BD ring replentish thresholds. The recommended
1317 	 * values are 1/8th the number of descriptors allocated to
1318 	 * each ring.
1319 	 */
1320 	val = 8;
1321 	CSR_WRITE_4(sc, BGE_RBDI_STD_REPL_THRESH, val);
1322 	if (BNX_IS_JUMBO_CAPABLE(sc)) {
1323 		CSR_WRITE_4(sc, BGE_RBDI_JUMBO_REPL_THRESH,
1324 		    BGE_JUMBO_RX_RING_CNT/8);
1325 	}
1326 	if (BNX_IS_57765_PLUS(sc)) {
1327 		CSR_WRITE_4(sc, BGE_STD_REPLENISH_LWM, 32);
1328 		CSR_WRITE_4(sc, BGE_JMB_REPLENISH_LWM, 16);
1329 	}
1330 
1331 	/*
1332 	 * Disable all send rings by setting the 'ring disabled' bit
1333 	 * in the flags field of all the TX send ring control blocks,
1334 	 * located in NIC memory.
1335 	 */
1336 	if (BNX_IS_5717_PLUS(sc))
1337 		limit = 4;
1338 	else if (BNX_IS_57765_FAMILY(sc) ||
1339 	    sc->bnx_asicrev == BGE_ASICREV_BCM5762)
1340 		limit = 2;
1341 	else
1342 		limit = 1;
1343 	vrcb = BGE_MEMWIN_START + BGE_SEND_RING_RCB;
1344 	for (i = 0; i < limit; i++) {
1345 		RCB_WRITE_4(sc, vrcb, bge_maxlen_flags,
1346 		    BGE_RCB_MAXLEN_FLAGS(0, BGE_RCB_FLAG_RING_DISABLED));
1347 		RCB_WRITE_4(sc, vrcb, bge_nicaddr, 0);
1348 		vrcb += sizeof(struct bge_rcb);
1349 	}
1350 
1351 	/* Configure send ring RCB 0 (we use only the first ring) */
1352 	vrcb = BGE_MEMWIN_START + BGE_SEND_RING_RCB;
1353 	BGE_HOSTADDR(taddr, txr->bnx_tx_ring_paddr);
1354 	RCB_WRITE_4(sc, vrcb, bge_hostaddr.bge_addr_hi, taddr.bge_addr_hi);
1355 	RCB_WRITE_4(sc, vrcb, bge_hostaddr.bge_addr_lo, taddr.bge_addr_lo);
1356 	if (BNX_IS_5717_PLUS(sc)) {
1357 		RCB_WRITE_4(sc, vrcb, bge_nicaddr, BGE_SEND_RING_5717);
1358 	} else {
1359 		RCB_WRITE_4(sc, vrcb, bge_nicaddr,
1360 		    BGE_NIC_TXRING_ADDR(0, BGE_TX_RING_CNT));
1361 	}
1362 	RCB_WRITE_4(sc, vrcb, bge_maxlen_flags,
1363 	    BGE_RCB_MAXLEN_FLAGS(BGE_TX_RING_CNT, 0));
1364 
1365 	/*
1366 	 * Disable all receive return rings by setting the
1367 	 * 'ring disabled' bit in the flags field of all the receive
1368 	 * return ring control blocks, located in NIC memory.
1369 	 */
1370 	if (BNX_IS_5717_PLUS(sc)) {
1371 		/* Should be 17, use 16 until we get an SRAM map. */
1372 		limit = 16;
1373 	} else if (BNX_IS_57765_FAMILY(sc) ||
1374 	    sc->bnx_asicrev == BGE_ASICREV_BCM5762) {
1375 		limit = 4;
1376 	} else {
1377 		limit = 1;
1378 	}
1379 	/* Disable all receive return rings. */
1380 	vrcb = BGE_MEMWIN_START + BGE_RX_RETURN_RING_RCB;
1381 	for (i = 0; i < limit; i++) {
1382 		RCB_WRITE_4(sc, vrcb, bge_hostaddr.bge_addr_hi, 0);
1383 		RCB_WRITE_4(sc, vrcb, bge_hostaddr.bge_addr_lo, 0);
1384 		RCB_WRITE_4(sc, vrcb, bge_maxlen_flags,
1385 		    BGE_RCB_FLAG_RING_DISABLED);
1386 		RCB_WRITE_4(sc, vrcb, bge_nicaddr, 0);
1387 		bnx_writembx(sc, BGE_MBX_RX_CONS0_LO +
1388 		    (i * (sizeof(uint64_t))), 0);
1389 		vrcb += sizeof(struct bge_rcb);
1390 	}
1391 
1392 	/*
1393 	 * Set up receive return ring 0.  Note that the NIC address
1394 	 * for RX return rings is 0x0.  The return rings live entirely
1395 	 * within the host, so the nicaddr field in the RCB isn't used.
1396 	 */
1397 	vrcb = BGE_MEMWIN_START + BGE_RX_RETURN_RING_RCB;
1398 	BGE_HOSTADDR(taddr, ret->bnx_rx_ret_ring_paddr);
1399 	RCB_WRITE_4(sc, vrcb, bge_hostaddr.bge_addr_hi, taddr.bge_addr_hi);
1400 	RCB_WRITE_4(sc, vrcb, bge_hostaddr.bge_addr_lo, taddr.bge_addr_lo);
1401 	RCB_WRITE_4(sc, vrcb, bge_nicaddr, 0);
1402 	RCB_WRITE_4(sc, vrcb, bge_maxlen_flags,
1403 	    BGE_RCB_MAXLEN_FLAGS(BNX_RETURN_RING_CNT, 0));
1404 
1405 	/* Set random backoff seed for TX */
1406 	CSR_WRITE_4(sc, BGE_TX_RANDOM_BACKOFF,
1407 	    sc->arpcom.ac_enaddr[0] + sc->arpcom.ac_enaddr[1] +
1408 	    sc->arpcom.ac_enaddr[2] + sc->arpcom.ac_enaddr[3] +
1409 	    sc->arpcom.ac_enaddr[4] + sc->arpcom.ac_enaddr[5] +
1410 	    BGE_TX_BACKOFF_SEED_MASK);
1411 
1412 	/* Set inter-packet gap */
1413 	val = 0x2620;
1414 	if (sc->bnx_asicrev == BGE_ASICREV_BCM5720 ||
1415 	    sc->bnx_asicrev == BGE_ASICREV_BCM5762) {
1416 		val |= CSR_READ_4(sc, BGE_TX_LENGTHS) &
1417 		    (BGE_TXLEN_JMB_FRM_LEN_MSK | BGE_TXLEN_CNT_DN_VAL_MSK);
1418 	}
1419 	CSR_WRITE_4(sc, BGE_TX_LENGTHS, val);
1420 
1421 	/*
1422 	 * Specify which ring to use for packets that don't match
1423 	 * any RX rules.
1424 	 */
1425 	CSR_WRITE_4(sc, BGE_RX_RULES_CFG, 0x08);
1426 
1427 	/*
1428 	 * Configure number of RX lists. One interrupt distribution
1429 	 * list, sixteen active lists, one bad frames class.
1430 	 */
1431 	CSR_WRITE_4(sc, BGE_RXLP_CFG, 0x181);
1432 
1433 	/* Inialize RX list placement stats mask. */
1434 	CSR_WRITE_4(sc, BGE_RXLP_STATS_ENABLE_MASK, 0x007FFFFF);
1435 	CSR_WRITE_4(sc, BGE_RXLP_STATS_CTL, 0x1);
1436 
1437 	/* Disable host coalescing until we get it set up */
1438 	CSR_WRITE_4(sc, BGE_HCC_MODE, 0x00000000);
1439 
1440 	/* Poll to make sure it's shut down. */
1441 	for (i = 0; i < BNX_TIMEOUT; i++) {
1442 		if (!(CSR_READ_4(sc, BGE_HCC_MODE) & BGE_HCCMODE_ENABLE))
1443 			break;
1444 		DELAY(10);
1445 	}
1446 
1447 	if (i == BNX_TIMEOUT) {
1448 		if_printf(&sc->arpcom.ac_if,
1449 			  "host coalescing engine failed to idle\n");
1450 		return(ENXIO);
1451 	}
1452 
1453 	/* Set up host coalescing defaults */
1454 	CSR_WRITE_4(sc, BGE_HCC_RX_COAL_TICKS, sc->bnx_rx_coal_ticks);
1455 	CSR_WRITE_4(sc, BGE_HCC_TX_COAL_TICKS, sc->bnx_tx_coal_ticks);
1456 	CSR_WRITE_4(sc, BGE_HCC_RX_MAX_COAL_BDS, sc->bnx_rx_coal_bds);
1457 	CSR_WRITE_4(sc, BGE_HCC_TX_MAX_COAL_BDS, sc->bnx_tx_coal_bds);
1458 	CSR_WRITE_4(sc, BGE_HCC_RX_MAX_COAL_BDS_INT, sc->bnx_rx_coal_bds_int);
1459 	CSR_WRITE_4(sc, BGE_HCC_TX_MAX_COAL_BDS_INT, sc->bnx_tx_coal_bds_int);
1460 
1461 	/* Set up address of status block */
1462 	bzero(intr->bnx_status_block, BGE_STATUS_BLK_SZ);
1463 	CSR_WRITE_4(sc, BGE_HCC_STATUSBLK_ADDR_HI,
1464 	    BGE_ADDR_HI(intr->bnx_status_block_paddr));
1465 	CSR_WRITE_4(sc, BGE_HCC_STATUSBLK_ADDR_LO,
1466 	    BGE_ADDR_LO(intr->bnx_status_block_paddr));
1467 
1468 	/* Set up status block partail update size. */
1469 	val = BGE_STATBLKSZ_32BYTE;
1470 #if 0
1471 	/*
1472 	 * Does not seem to have visible effect in both
1473 	 * bulk data (1472B UDP datagram) and tiny data
1474 	 * (18B UDP datagram) TX tests.
1475 	 */
1476 	val |= BGE_HCCMODE_CLRTICK_TX;
1477 #endif
1478 	/* Turn on host coalescing state machine */
1479 	CSR_WRITE_4(sc, BGE_HCC_MODE, val | BGE_HCCMODE_ENABLE);
1480 
1481 	/* Turn on RX BD completion state machine and enable attentions */
1482 	CSR_WRITE_4(sc, BGE_RBDC_MODE,
1483 	    BGE_RBDCMODE_ENABLE|BGE_RBDCMODE_ATTN);
1484 
1485 	/* Turn on RX list placement state machine */
1486 	CSR_WRITE_4(sc, BGE_RXLP_MODE, BGE_RXLPMODE_ENABLE);
1487 
1488 	val = BGE_MACMODE_TXDMA_ENB | BGE_MACMODE_RXDMA_ENB |
1489 	    BGE_MACMODE_RX_STATS_CLEAR | BGE_MACMODE_TX_STATS_CLEAR |
1490 	    BGE_MACMODE_RX_STATS_ENB | BGE_MACMODE_TX_STATS_ENB |
1491 	    BGE_MACMODE_FRMHDR_DMA_ENB;
1492 
1493 	if (sc->bnx_flags & BNX_FLAG_TBI)
1494 		val |= BGE_PORTMODE_TBI;
1495 	else if (sc->bnx_flags & BNX_FLAG_MII_SERDES)
1496 		val |= BGE_PORTMODE_GMII;
1497 	else
1498 		val |= BGE_PORTMODE_MII;
1499 
1500 	/* Turn on DMA, clear stats */
1501 	CSR_WRITE_4(sc, BGE_MAC_MODE, val);
1502 
1503 	/* Set misc. local control, enable interrupts on attentions */
1504 	CSR_WRITE_4(sc, BGE_MISC_LOCAL_CTL, BGE_MLC_INTR_ONATTN);
1505 
1506 #ifdef notdef
1507 	/* Assert GPIO pins for PHY reset */
1508 	BNX_SETBIT(sc, BGE_MISC_LOCAL_CTL, BGE_MLC_MISCIO_OUT0|
1509 	    BGE_MLC_MISCIO_OUT1|BGE_MLC_MISCIO_OUT2);
1510 	BNX_SETBIT(sc, BGE_MISC_LOCAL_CTL, BGE_MLC_MISCIO_OUTEN0|
1511 	    BGE_MLC_MISCIO_OUTEN1|BGE_MLC_MISCIO_OUTEN2);
1512 #endif
1513 
1514 	/* Turn on write DMA state machine */
1515 	val = BGE_WDMAMODE_ENABLE|BGE_WDMAMODE_ALL_ATTNS;
1516 	/* Enable host coalescing bug fix. */
1517 	val |= BGE_WDMAMODE_STATUS_TAG_FIX;
1518 	if (sc->bnx_asicrev == BGE_ASICREV_BCM5785) {
1519 		/* Request larger DMA burst size to get better performance. */
1520 		val |= BGE_WDMAMODE_BURST_ALL_DATA;
1521 	}
1522 	CSR_WRITE_4(sc, BGE_WDMA_MODE, val);
1523 	DELAY(40);
1524 
1525 	if (BNX_IS_57765_PLUS(sc)) {
1526 		uint32_t dmactl, dmactl_reg;
1527 
1528 		if (sc->bnx_asicrev == BGE_ASICREV_BCM5762)
1529 			dmactl_reg = BGE_RDMA_RSRVCTRL2;
1530 		else
1531 			dmactl_reg = BGE_RDMA_RSRVCTRL;
1532 
1533 		dmactl = CSR_READ_4(sc, dmactl_reg);
1534 		/*
1535 		 * Adjust tx margin to prevent TX data corruption and
1536 		 * fix internal FIFO overflow.
1537 		 */
1538 		if (sc->bnx_asicrev == BGE_ASICREV_BCM5719 ||
1539 		    sc->bnx_asicrev == BGE_ASICREV_BCM5720 ||
1540 		    sc->bnx_asicrev == BGE_ASICREV_BCM5762) {
1541 			dmactl &= ~(BGE_RDMA_RSRVCTRL_FIFO_LWM_MASK |
1542 			    BGE_RDMA_RSRVCTRL_FIFO_HWM_MASK |
1543 			    BGE_RDMA_RSRVCTRL_TXMRGN_MASK);
1544 			dmactl |= BGE_RDMA_RSRVCTRL_FIFO_LWM_1_5K |
1545 			    BGE_RDMA_RSRVCTRL_FIFO_HWM_1_5K |
1546 			    BGE_RDMA_RSRVCTRL_TXMRGN_320B;
1547 		}
1548 		/*
1549 		 * Enable fix for read DMA FIFO overruns.
1550 		 * The fix is to limit the number of RX BDs
1551 		 * the hardware would fetch at a fime.
1552 		 */
1553 		CSR_WRITE_4(sc, dmactl_reg,
1554 		    dmactl | BGE_RDMA_RSRVCTRL_FIFO_OFLW_FIX);
1555 	}
1556 
1557 	if (sc->bnx_asicrev == BGE_ASICREV_BCM5719) {
1558 		CSR_WRITE_4(sc, BGE_RDMA_LSO_CRPTEN_CTRL,
1559 		    CSR_READ_4(sc, BGE_RDMA_LSO_CRPTEN_CTRL) |
1560 		    BGE_RDMA_LSO_CRPTEN_CTRL_BLEN_BD_4K |
1561 		    BGE_RDMA_LSO_CRPTEN_CTRL_BLEN_LSO_4K);
1562 	} else if (sc->bnx_asicrev == BGE_ASICREV_BCM5720 ||
1563 	    sc->bnx_asicrev == BGE_ASICREV_BCM5762) {
1564 		uint32_t ctrl_reg;
1565 
1566 		if (sc->bnx_asicrev == BGE_ASICREV_BCM5762)
1567 			ctrl_reg = BGE_RDMA_LSO_CRPTEN_CTRL2;
1568 		else
1569 			ctrl_reg = BGE_RDMA_LSO_CRPTEN_CTRL;
1570 
1571 		/*
1572 		 * Allow 4KB burst length reads for non-LSO frames.
1573 		 * Enable 512B burst length reads for buffer descriptors.
1574 		 */
1575 		CSR_WRITE_4(sc, ctrl_reg,
1576 		    CSR_READ_4(sc, ctrl_reg) |
1577 		    BGE_RDMA_LSO_CRPTEN_CTRL_BLEN_BD_512 |
1578 		    BGE_RDMA_LSO_CRPTEN_CTRL_BLEN_LSO_4K);
1579 	}
1580 
1581 	/* Turn on read DMA state machine */
1582 	val = BGE_RDMAMODE_ENABLE | BGE_RDMAMODE_ALL_ATTNS;
1583 	if (sc->bnx_asicrev == BGE_ASICREV_BCM5717)
1584 		val |= BGE_RDMAMODE_MULT_DMA_RD_DIS;
1585         if (sc->bnx_asicrev == BGE_ASICREV_BCM5784 ||
1586             sc->bnx_asicrev == BGE_ASICREV_BCM5785 ||
1587             sc->bnx_asicrev == BGE_ASICREV_BCM57780) {
1588 		val |= BGE_RDMAMODE_BD_SBD_CRPT_ATTN |
1589 		    BGE_RDMAMODE_MBUF_RBD_CRPT_ATTN |
1590 		    BGE_RDMAMODE_MBUF_SBD_CRPT_ATTN;
1591 	}
1592 	if (sc->bnx_asicrev == BGE_ASICREV_BCM5720 ||
1593 	    sc->bnx_asicrev == BGE_ASICREV_BCM5762) {
1594 		val |= CSR_READ_4(sc, BGE_RDMA_MODE) &
1595 		    BGE_RDMAMODE_H2BNC_VLAN_DET;
1596 		/*
1597 		 * Allow multiple outstanding read requests from
1598 		 * non-LSO read DMA engine.
1599 		 */
1600 		val &= ~BGE_RDMAMODE_MULT_DMA_RD_DIS;
1601 	}
1602 	if (sc->bnx_asicrev == BGE_ASICREV_BCM57766)
1603 		val |= BGE_RDMAMODE_JMB_2K_MMRR;
1604 	if (sc->bnx_flags & BNX_FLAG_TSO)
1605 		val |= BGE_RDMAMODE_TSO4_ENABLE;
1606 	val |= BGE_RDMAMODE_FIFO_LONG_BURST;
1607 	CSR_WRITE_4(sc, BGE_RDMA_MODE, val);
1608 	DELAY(40);
1609 
1610 	/* Turn on RX data completion state machine */
1611 	CSR_WRITE_4(sc, BGE_RDC_MODE, BGE_RDCMODE_ENABLE);
1612 
1613 	/* Turn on RX BD initiator state machine */
1614 	CSR_WRITE_4(sc, BGE_RBDI_MODE, BGE_RBDIMODE_ENABLE);
1615 
1616 	/* Turn on RX data and RX BD initiator state machine */
1617 	CSR_WRITE_4(sc, BGE_RDBDI_MODE, BGE_RDBDIMODE_ENABLE);
1618 
1619 	/* Turn on send BD completion state machine */
1620 	CSR_WRITE_4(sc, BGE_SBDC_MODE, BGE_SBDCMODE_ENABLE);
1621 
1622 	/* Turn on send data completion state machine */
1623 	val = BGE_SDCMODE_ENABLE;
1624 	if (sc->bnx_asicrev == BGE_ASICREV_BCM5761)
1625 		val |= BGE_SDCMODE_CDELAY;
1626 	CSR_WRITE_4(sc, BGE_SDC_MODE, val);
1627 
1628 	/* Turn on send data initiator state machine */
1629 	if (sc->bnx_flags & BNX_FLAG_TSO) {
1630 		CSR_WRITE_4(sc, BGE_SDI_MODE, BGE_SDIMODE_ENABLE |
1631 		    BGE_SDIMODE_HW_LSO_PRE_DMA);
1632 	} else {
1633 		CSR_WRITE_4(sc, BGE_SDI_MODE, BGE_SDIMODE_ENABLE);
1634 	}
1635 
1636 	/* Turn on send BD initiator state machine */
1637 	CSR_WRITE_4(sc, BGE_SBDI_MODE, BGE_SBDIMODE_ENABLE);
1638 
1639 	/* Turn on send BD selector state machine */
1640 	CSR_WRITE_4(sc, BGE_SRS_MODE, BGE_SRSMODE_ENABLE);
1641 
1642 	CSR_WRITE_4(sc, BGE_SDI_STATS_ENABLE_MASK, 0x007FFFFF);
1643 	CSR_WRITE_4(sc, BGE_SDI_STATS_CTL,
1644 	    BGE_SDISTATSCTL_ENABLE|BGE_SDISTATSCTL_FASTER);
1645 
1646 	/* ack/clear link change events */
1647 	CSR_WRITE_4(sc, BGE_MAC_STS, BGE_MACSTAT_SYNC_CHANGED|
1648 	    BGE_MACSTAT_CFG_CHANGED|BGE_MACSTAT_MI_COMPLETE|
1649 	    BGE_MACSTAT_LINK_CHANGED);
1650 	CSR_WRITE_4(sc, BGE_MI_STS, 0);
1651 
1652 	/*
1653 	 * Enable attention when the link has changed state for
1654 	 * devices that use auto polling.
1655 	 */
1656 	if (sc->bnx_flags & BNX_FLAG_TBI) {
1657 		CSR_WRITE_4(sc, BGE_MI_STS, BGE_MISTS_LINK);
1658  	} else {
1659 		if (sc->bnx_mi_mode & BGE_MIMODE_AUTOPOLL) {
1660 			CSR_WRITE_4(sc, BGE_MI_MODE, sc->bnx_mi_mode);
1661 			DELAY(80);
1662 		}
1663 	}
1664 
1665 	/*
1666 	 * Clear any pending link state attention.
1667 	 * Otherwise some link state change events may be lost until attention
1668 	 * is cleared by bnx_intr() -> bnx_softc.bnx_link_upd() sequence.
1669 	 * It's not necessary on newer BCM chips - perhaps enabling link
1670 	 * state change attentions implies clearing pending attention.
1671 	 */
1672 	CSR_WRITE_4(sc, BGE_MAC_STS, BGE_MACSTAT_SYNC_CHANGED|
1673 	    BGE_MACSTAT_CFG_CHANGED|BGE_MACSTAT_MI_COMPLETE|
1674 	    BGE_MACSTAT_LINK_CHANGED);
1675 
1676 	/* Enable link state change attentions. */
1677 	BNX_SETBIT(sc, BGE_MAC_EVT_ENB, BGE_EVTENB_LINK_CHANGED);
1678 
1679 	return(0);
1680 }
1681 
1682 /*
1683  * Probe for a Broadcom chip. Check the PCI vendor and device IDs
1684  * against our list and return its name if we find a match. Note
1685  * that since the Broadcom controller contains VPD support, we
1686  * can get the device name string from the controller itself instead
1687  * of the compiled-in string. This is a little slow, but it guarantees
1688  * we'll always announce the right product name.
1689  */
1690 static int
1691 bnx_probe(device_t dev)
1692 {
1693 	const struct bnx_type *t;
1694 	uint16_t product, vendor;
1695 
1696 	if (!pci_is_pcie(dev))
1697 		return ENXIO;
1698 
1699 	product = pci_get_device(dev);
1700 	vendor = pci_get_vendor(dev);
1701 
1702 	for (t = bnx_devs; t->bnx_name != NULL; t++) {
1703 		if (vendor == t->bnx_vid && product == t->bnx_did)
1704 			break;
1705 	}
1706 	if (t->bnx_name == NULL)
1707 		return ENXIO;
1708 
1709 	device_set_desc(dev, t->bnx_name);
1710 	return 0;
1711 }
1712 
1713 static int
1714 bnx_attach(device_t dev)
1715 {
1716 	struct ifnet *ifp;
1717 	struct bnx_softc *sc;
1718 	struct bnx_rx_std_ring *std;
1719 	uint32_t hwcfg = 0;
1720 	int error = 0, rid, capmask, i, std_cpuid, std_cpuid_def;
1721 	uint8_t ether_addr[ETHER_ADDR_LEN];
1722 	uint16_t product;
1723 	uintptr_t mii_priv = 0;
1724 #ifdef BNX_TSO_DEBUG
1725 	char desc[32];
1726 #endif
1727 #ifdef IFPOLL_ENABLE
1728 	int offset, offset_def;
1729 #endif
1730 
1731 	sc = device_get_softc(dev);
1732 	sc->bnx_dev = dev;
1733 	callout_init_mp(&sc->bnx_tick_timer);
1734 	lwkt_serialize_init(&sc->bnx_jslot_serializer);
1735 	lwkt_serialize_init(&sc->bnx_main_serialize);
1736 
1737 	product = pci_get_device(dev);
1738 
1739 #ifndef BURN_BRIDGES
1740 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0) {
1741 		uint32_t irq, mem;
1742 
1743 		irq = pci_read_config(dev, PCIR_INTLINE, 4);
1744 		mem = pci_read_config(dev, BGE_PCI_BAR0, 4);
1745 
1746 		device_printf(dev, "chip is in D%d power mode "
1747 		    "-- setting to D0\n", pci_get_powerstate(dev));
1748 
1749 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
1750 
1751 		pci_write_config(dev, PCIR_INTLINE, irq, 4);
1752 		pci_write_config(dev, BGE_PCI_BAR0, mem, 4);
1753 	}
1754 #endif	/* !BURN_BRIDGE */
1755 
1756 	/*
1757 	 * Map control/status registers.
1758 	 */
1759 	pci_enable_busmaster(dev);
1760 
1761 	rid = BGE_PCI_BAR0;
1762 	sc->bnx_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
1763 	    RF_ACTIVE);
1764 
1765 	if (sc->bnx_res == NULL) {
1766 		device_printf(dev, "couldn't map memory\n");
1767 		return ENXIO;
1768 	}
1769 
1770 	sc->bnx_btag = rman_get_bustag(sc->bnx_res);
1771 	sc->bnx_bhandle = rman_get_bushandle(sc->bnx_res);
1772 
1773 	/* Save various chip information */
1774 	sc->bnx_chipid =
1775 	    pci_read_config(dev, BGE_PCI_MISC_CTL, 4) >>
1776 	    BGE_PCIMISCCTL_ASICREV_SHIFT;
1777 	if (BGE_ASICREV(sc->bnx_chipid) == BGE_ASICREV_USE_PRODID_REG) {
1778 		/* All chips having dedicated ASICREV register have CPMU */
1779 		sc->bnx_flags |= BNX_FLAG_CPMU;
1780 
1781 		switch (product) {
1782 		case PCI_PRODUCT_BROADCOM_BCM5717:
1783 		case PCI_PRODUCT_BROADCOM_BCM5717C:
1784 		case PCI_PRODUCT_BROADCOM_BCM5718:
1785 		case PCI_PRODUCT_BROADCOM_BCM5719:
1786 		case PCI_PRODUCT_BROADCOM_BCM5720_ALT:
1787 		case PCI_PRODUCT_BROADCOM_BCM5725:
1788 		case PCI_PRODUCT_BROADCOM_BCM5727:
1789 		case PCI_PRODUCT_BROADCOM_BCM5762:
1790 			sc->bnx_chipid = pci_read_config(dev,
1791 			    BGE_PCI_GEN2_PRODID_ASICREV, 4);
1792 			break;
1793 
1794 		case PCI_PRODUCT_BROADCOM_BCM57761:
1795 		case PCI_PRODUCT_BROADCOM_BCM57762:
1796 		case PCI_PRODUCT_BROADCOM_BCM57765:
1797 		case PCI_PRODUCT_BROADCOM_BCM57766:
1798 		case PCI_PRODUCT_BROADCOM_BCM57781:
1799 		case PCI_PRODUCT_BROADCOM_BCM57782:
1800 		case PCI_PRODUCT_BROADCOM_BCM57785:
1801 		case PCI_PRODUCT_BROADCOM_BCM57786:
1802 		case PCI_PRODUCT_BROADCOM_BCM57791:
1803 		case PCI_PRODUCT_BROADCOM_BCM57795:
1804 			sc->bnx_chipid = pci_read_config(dev,
1805 			    BGE_PCI_GEN15_PRODID_ASICREV, 4);
1806 			break;
1807 
1808 		default:
1809 			sc->bnx_chipid = pci_read_config(dev,
1810 			    BGE_PCI_PRODID_ASICREV, 4);
1811 			break;
1812 		}
1813 	}
1814 	if (sc->bnx_chipid == BGE_CHIPID_BCM5717_C0)
1815 		sc->bnx_chipid = BGE_CHIPID_BCM5720_A0;
1816 
1817 	sc->bnx_asicrev = BGE_ASICREV(sc->bnx_chipid);
1818 	sc->bnx_chiprev = BGE_CHIPREV(sc->bnx_chipid);
1819 
1820 	switch (sc->bnx_asicrev) {
1821 	case BGE_ASICREV_BCM5717:
1822 	case BGE_ASICREV_BCM5719:
1823 	case BGE_ASICREV_BCM5720:
1824 		sc->bnx_flags |= BNX_FLAG_5717_PLUS | BNX_FLAG_57765_PLUS;
1825 		break;
1826 
1827 	case BGE_ASICREV_BCM5762:
1828 		sc->bnx_flags |= BNX_FLAG_57765_PLUS;
1829 		break;
1830 
1831 	case BGE_ASICREV_BCM57765:
1832 	case BGE_ASICREV_BCM57766:
1833 		sc->bnx_flags |= BNX_FLAG_57765_FAMILY | BNX_FLAG_57765_PLUS;
1834 		break;
1835 	}
1836 
1837 	sc->bnx_flags |= BNX_FLAG_TSO;
1838 	if (sc->bnx_asicrev == BGE_ASICREV_BCM5719 &&
1839 	    sc->bnx_chipid == BGE_CHIPID_BCM5719_A0)
1840 		sc->bnx_flags &= ~BNX_FLAG_TSO;
1841 
1842 	if (sc->bnx_asicrev == BGE_ASICREV_BCM5717 ||
1843 	    BNX_IS_57765_FAMILY(sc)) {
1844 		/*
1845 		 * All BCM57785 and BCM5718 families chips have a bug that
1846 		 * under certain situation interrupt will not be enabled
1847 		 * even if status tag is written to interrupt mailbox.
1848 		 *
1849 		 * While BCM5719 and BCM5720 have a hardware workaround
1850 		 * which could fix the above bug.
1851 		 * See the comment near BGE_PCIDMARWCTL_TAGGED_STATUS_WA in
1852 		 * bnx_chipinit().
1853 		 *
1854 		 * For the rest of the chips in these two families, we will
1855 		 * have to poll the status block at high rate (10ms currently)
1856 		 * to check whether the interrupt is hosed or not.
1857 		 * See bnx_check_intr() for details.
1858 		 */
1859 		sc->bnx_flags |= BNX_FLAG_STATUSTAG_BUG;
1860 	}
1861 
1862 	sc->bnx_pciecap = pci_get_pciecap_ptr(sc->bnx_dev);
1863 	if (sc->bnx_asicrev == BGE_ASICREV_BCM5719 ||
1864 	    sc->bnx_asicrev == BGE_ASICREV_BCM5720)
1865 		pcie_set_max_readrq(dev, PCIEM_DEVCTL_MAX_READRQ_2048);
1866 	else
1867 		pcie_set_max_readrq(dev, PCIEM_DEVCTL_MAX_READRQ_4096);
1868 	device_printf(dev, "CHIP ID 0x%08x; "
1869 		      "ASIC REV 0x%02x; CHIP REV 0x%02x\n",
1870 		      sc->bnx_chipid, sc->bnx_asicrev, sc->bnx_chiprev);
1871 
1872 	/*
1873 	 * Set various PHY quirk flags.
1874 	 */
1875 
1876 	capmask = MII_CAPMASK_DEFAULT;
1877 	if (product == PCI_PRODUCT_BROADCOM_BCM57791 ||
1878 	    product == PCI_PRODUCT_BROADCOM_BCM57795) {
1879 		/* 10/100 only */
1880 		capmask &= ~BMSR_EXTSTAT;
1881 	}
1882 
1883 	mii_priv |= BRGPHY_FLAG_WIRESPEED;
1884 	if (sc->bnx_chipid == BGE_CHIPID_BCM5762_A0)
1885 		mii_priv |= BRGPHY_FLAG_5762_A0;
1886 
1887 	/* Initialize if_name earlier, so if_printf could be used */
1888 	ifp = &sc->arpcom.ac_if;
1889 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1890 
1891 	/* Try to reset the chip. */
1892 	bnx_reset(sc);
1893 
1894 	if (bnx_chipinit(sc)) {
1895 		device_printf(dev, "chip initialization failed\n");
1896 		error = ENXIO;
1897 		goto fail;
1898 	}
1899 
1900 	/*
1901 	 * Get station address
1902 	 */
1903 	error = bnx_get_eaddr(sc, ether_addr);
1904 	if (error) {
1905 		device_printf(dev, "failed to read station address\n");
1906 		goto fail;
1907 	}
1908 
1909 	/* XXX */
1910 	sc->bnx_tx_ringcnt = 1;
1911 	sc->bnx_rx_retcnt = 1;
1912 	sc->bnx_intr_cnt = 1;
1913 
1914 	if ((sc->bnx_rx_retcnt == 1 && sc->bnx_tx_ringcnt == 1) ||
1915 	    (sc->bnx_rx_retcnt > 1 && sc->bnx_tx_ringcnt > 1)) {
1916 	    	/*
1917 		 * The RX ring and the corresponding TX ring processing
1918 		 * should be on the same CPU, since they share the same
1919 		 * status block.
1920 		 */
1921 		sc->bnx_flags |= BNX_FLAG_RXTX_BUNDLE;
1922 		if (bootverbose)
1923 			device_printf(dev, "RX/TX bundle\n");
1924 	} else {
1925 		KKASSERT(sc->bnx_rx_retcnt > 1 && sc->bnx_tx_ringcnt == 1);
1926 	}
1927 
1928 	error = bnx_dma_alloc(dev);
1929 	if (error)
1930 		goto fail;
1931 
1932 #ifdef IFPOLL_ENABLE
1933 	if (sc->bnx_flags & BNX_FLAG_RXTX_BUNDLE) {
1934 		/*
1935 		 * NPOLLING RX/TX CPU offset
1936 		 */
1937 		if (sc->bnx_rx_retcnt == ncpus2) {
1938 			offset = 0;
1939 		} else {
1940 			offset_def =
1941 			(sc->bnx_rx_retcnt * device_get_unit(dev)) % ncpus2;
1942 			offset = device_getenv_int(dev, "npoll.offset",
1943 			    offset_def);
1944 			if (offset >= ncpus2 ||
1945 			    offset % sc->bnx_rx_retcnt != 0) {
1946 				device_printf(dev, "invalid npoll.offset %d, "
1947 				    "use %d\n", offset, offset_def);
1948 				offset = offset_def;
1949 			}
1950 		}
1951 		sc->bnx_npoll_rxoff = offset;
1952 		sc->bnx_npoll_txoff = offset;
1953 	} else {
1954 		/*
1955 		 * NPOLLING RX CPU offset
1956 		 */
1957 		if (sc->bnx_rx_retcnt == ncpus2) {
1958 			offset = 0;
1959 		} else {
1960 			offset_def =
1961 			(sc->bnx_rx_retcnt * device_get_unit(dev)) % ncpus2;
1962 			offset = device_getenv_int(dev, "npoll.rxoff",
1963 			    offset_def);
1964 			if (offset >= ncpus2 ||
1965 			    offset % sc->bnx_rx_retcnt != 0) {
1966 				device_printf(dev, "invalid npoll.rxoff %d, "
1967 				    "use %d\n", offset, offset_def);
1968 				offset = offset_def;
1969 			}
1970 		}
1971 		sc->bnx_npoll_rxoff = offset;
1972 
1973 		/*
1974 		 * NPOLLING TX CPU offset
1975 		 */
1976 		offset_def = device_get_unit(dev) % ncpus2;
1977 		offset = device_getenv_int(dev, "npoll.txoff", offset_def);
1978 		if (offset >= ncpus2) {
1979 			device_printf(dev, "invalid npoll.txoff %d, use %d\n",
1980 			    offset, offset_def);
1981 			offset = offset_def;
1982 		}
1983 		sc->bnx_npoll_txoff = offset;
1984 	}
1985 #endif	/* IFPOLL_ENABLE */
1986 
1987 	/*
1988 	 * Allocate interrupt
1989 	 */
1990 	error = bnx_alloc_intr(sc);
1991 	if (error)
1992 		goto fail;
1993 
1994 	/* Setup serializers */
1995 	bnx_setup_serialize(sc);
1996 
1997 	/* Set default tuneable values. */
1998 	sc->bnx_rx_coal_ticks = BNX_RX_COAL_TICKS_DEF;
1999 	sc->bnx_tx_coal_ticks = BNX_TX_COAL_TICKS_DEF;
2000 	sc->bnx_rx_coal_bds = BNX_RX_COAL_BDS_DEF;
2001 	sc->bnx_tx_coal_bds = BNX_TX_COAL_BDS_DEF;
2002 	sc->bnx_rx_coal_bds_int = BNX_RX_COAL_BDS_INT_DEF;
2003 	sc->bnx_tx_coal_bds_int = BNX_TX_COAL_BDS_INT_DEF;
2004 
2005 	/* Set up ifnet structure */
2006 	ifp->if_softc = sc;
2007 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2008 	ifp->if_ioctl = bnx_ioctl;
2009 	ifp->if_start = bnx_start;
2010 #ifdef IFPOLL_ENABLE
2011 	ifp->if_npoll = bnx_npoll;
2012 #endif
2013 	ifp->if_init = bnx_init;
2014 	ifp->if_serialize = bnx_serialize;
2015 	ifp->if_deserialize = bnx_deserialize;
2016 	ifp->if_tryserialize = bnx_tryserialize;
2017 #ifdef INVARIANTS
2018 	ifp->if_serialize_assert = bnx_serialize_assert;
2019 #endif
2020 	ifp->if_mtu = ETHERMTU;
2021 	ifp->if_capabilities = IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2022 
2023 	ifp->if_capabilities |= IFCAP_HWCSUM;
2024 	ifp->if_hwassist = BNX_CSUM_FEATURES;
2025 	if (sc->bnx_flags & BNX_FLAG_TSO) {
2026 		ifp->if_capabilities |= IFCAP_TSO;
2027 		ifp->if_hwassist |= CSUM_TSO;
2028 	}
2029 	ifp->if_capenable = ifp->if_capabilities;
2030 
2031 	ifq_set_maxlen(&ifp->if_snd, BGE_TX_RING_CNT - 1);
2032 	ifq_set_ready(&ifp->if_snd);
2033 	ifq_set_subq_cnt(&ifp->if_snd, sc->bnx_tx_ringcnt);
2034 
2035 	/*
2036 	 * Figure out what sort of media we have by checking the
2037 	 * hardware config word in the first 32k of NIC internal memory,
2038 	 * or fall back to examining the EEPROM if necessary.
2039 	 * Note: on some BCM5700 cards, this value appears to be unset.
2040 	 * If that's the case, we have to rely on identifying the NIC
2041 	 * by its PCI subsystem ID, as we do below for the SysKonnect
2042 	 * SK-9D41.
2043 	 */
2044 	if (bnx_readmem_ind(sc, BGE_SOFTWARE_GENCOMM_SIG) == BGE_MAGIC_NUMBER) {
2045 		hwcfg = bnx_readmem_ind(sc, BGE_SOFTWARE_GENCOMM_NICCFG);
2046 	} else {
2047 		if (bnx_read_eeprom(sc, (caddr_t)&hwcfg, BGE_EE_HWCFG_OFFSET,
2048 				    sizeof(hwcfg))) {
2049 			device_printf(dev, "failed to read EEPROM\n");
2050 			error = ENXIO;
2051 			goto fail;
2052 		}
2053 		hwcfg = ntohl(hwcfg);
2054 	}
2055 
2056 	/* The SysKonnect SK-9D41 is a 1000baseSX card. */
2057 	if (pci_get_subvendor(dev) == PCI_PRODUCT_SCHNEIDERKOCH_SK_9D41 ||
2058 	    (hwcfg & BGE_HWCFG_MEDIA) == BGE_MEDIA_FIBER)
2059 		sc->bnx_flags |= BNX_FLAG_TBI;
2060 
2061 	/* Setup MI MODE */
2062 	if (sc->bnx_flags & BNX_FLAG_CPMU)
2063 		sc->bnx_mi_mode = BGE_MIMODE_500KHZ_CONST;
2064 	else
2065 		sc->bnx_mi_mode = BGE_MIMODE_BASE;
2066 
2067 	/* Setup link status update stuffs */
2068 	if (sc->bnx_flags & BNX_FLAG_TBI) {
2069 		sc->bnx_link_upd = bnx_tbi_link_upd;
2070 		sc->bnx_link_chg = BGE_MACSTAT_LINK_CHANGED;
2071 	} else if (sc->bnx_mi_mode & BGE_MIMODE_AUTOPOLL) {
2072 		sc->bnx_link_upd = bnx_autopoll_link_upd;
2073 		sc->bnx_link_chg = BGE_MACSTAT_LINK_CHANGED;
2074 	} else {
2075 		sc->bnx_link_upd = bnx_copper_link_upd;
2076 		sc->bnx_link_chg = BGE_MACSTAT_LINK_CHANGED;
2077 	}
2078 
2079 	/* Set default PHY address */
2080 	sc->bnx_phyno = 1;
2081 
2082 	/*
2083 	 * PHY address mapping for various devices.
2084 	 *
2085 	 *          | F0 Cu | F0 Sr | F1 Cu | F1 Sr |
2086 	 * ---------+-------+-------+-------+-------+
2087 	 * BCM57XX  |   1   |   X   |   X   |   X   |
2088 	 * BCM5704  |   1   |   X   |   1   |   X   |
2089 	 * BCM5717  |   1   |   8   |   2   |   9   |
2090 	 * BCM5719  |   1   |   8   |   2   |   9   |
2091 	 * BCM5720  |   1   |   8   |   2   |   9   |
2092 	 *
2093 	 * Other addresses may respond but they are not
2094 	 * IEEE compliant PHYs and should be ignored.
2095 	 */
2096 	if (BNX_IS_5717_PLUS(sc)) {
2097 		int f;
2098 
2099 		f = pci_get_function(dev);
2100 		if (sc->bnx_chipid == BGE_CHIPID_BCM5717_A0) {
2101 			if (CSR_READ_4(sc, BGE_SGDIG_STS) &
2102 			    BGE_SGDIGSTS_IS_SERDES)
2103 				sc->bnx_phyno = f + 8;
2104 			else
2105 				sc->bnx_phyno = f + 1;
2106 		} else {
2107 			if (CSR_READ_4(sc, BGE_CPMU_PHY_STRAP) &
2108 			    BGE_CPMU_PHY_STRAP_IS_SERDES)
2109 				sc->bnx_phyno = f + 8;
2110 			else
2111 				sc->bnx_phyno = f + 1;
2112 		}
2113 	}
2114 
2115 	if (sc->bnx_flags & BNX_FLAG_TBI) {
2116 		ifmedia_init(&sc->bnx_ifmedia, IFM_IMASK,
2117 		    bnx_ifmedia_upd, bnx_ifmedia_sts);
2118 		ifmedia_add(&sc->bnx_ifmedia, IFM_ETHER|IFM_1000_SX, 0, NULL);
2119 		ifmedia_add(&sc->bnx_ifmedia,
2120 		    IFM_ETHER|IFM_1000_SX|IFM_FDX, 0, NULL);
2121 		ifmedia_add(&sc->bnx_ifmedia, IFM_ETHER|IFM_AUTO, 0, NULL);
2122 		ifmedia_set(&sc->bnx_ifmedia, IFM_ETHER|IFM_AUTO);
2123 		sc->bnx_ifmedia.ifm_media = sc->bnx_ifmedia.ifm_cur->ifm_media;
2124 	} else {
2125 		struct mii_probe_args mii_args;
2126 
2127 		mii_probe_args_init(&mii_args, bnx_ifmedia_upd, bnx_ifmedia_sts);
2128 		mii_args.mii_probemask = 1 << sc->bnx_phyno;
2129 		mii_args.mii_capmask = capmask;
2130 		mii_args.mii_privtag = MII_PRIVTAG_BRGPHY;
2131 		mii_args.mii_priv = mii_priv;
2132 
2133 		error = mii_probe(dev, &sc->bnx_miibus, &mii_args);
2134 		if (error) {
2135 			device_printf(dev, "MII without any PHY!\n");
2136 			goto fail;
2137 		}
2138 	}
2139 
2140 	/*
2141 	 * Create sysctl nodes.
2142 	 */
2143 	sysctl_ctx_init(&sc->bnx_sysctl_ctx);
2144 	sc->bnx_sysctl_tree = SYSCTL_ADD_NODE(&sc->bnx_sysctl_ctx,
2145 					      SYSCTL_STATIC_CHILDREN(_hw),
2146 					      OID_AUTO,
2147 					      device_get_nameunit(dev),
2148 					      CTLFLAG_RD, 0, "");
2149 	if (sc->bnx_sysctl_tree == NULL) {
2150 		device_printf(dev, "can't add sysctl node\n");
2151 		error = ENXIO;
2152 		goto fail;
2153 	}
2154 
2155 	SYSCTL_ADD_INT(&sc->bnx_sysctl_ctx,
2156 	    SYSCTL_CHILDREN(sc->bnx_sysctl_tree), OID_AUTO,
2157 	    "rx_rings", CTLFLAG_RD, &sc->bnx_rx_retcnt, 0, "# of RX rings");
2158 	SYSCTL_ADD_INT(&sc->bnx_sysctl_ctx,
2159 	    SYSCTL_CHILDREN(sc->bnx_sysctl_tree), OID_AUTO,
2160 	    "tx_rings", CTLFLAG_RD, &sc->bnx_tx_ringcnt, 0, "# of TX rings");
2161 
2162 	SYSCTL_ADD_PROC(&sc->bnx_sysctl_ctx,
2163 			SYSCTL_CHILDREN(sc->bnx_sysctl_tree),
2164 			OID_AUTO, "rx_coal_ticks",
2165 			CTLTYPE_INT | CTLFLAG_RW,
2166 			sc, 0, bnx_sysctl_rx_coal_ticks, "I",
2167 			"Receive coalescing ticks (usec).");
2168 	SYSCTL_ADD_PROC(&sc->bnx_sysctl_ctx,
2169 			SYSCTL_CHILDREN(sc->bnx_sysctl_tree),
2170 			OID_AUTO, "tx_coal_ticks",
2171 			CTLTYPE_INT | CTLFLAG_RW,
2172 			sc, 0, bnx_sysctl_tx_coal_ticks, "I",
2173 			"Transmit coalescing ticks (usec).");
2174 	SYSCTL_ADD_PROC(&sc->bnx_sysctl_ctx,
2175 			SYSCTL_CHILDREN(sc->bnx_sysctl_tree),
2176 			OID_AUTO, "rx_coal_bds",
2177 			CTLTYPE_INT | CTLFLAG_RW,
2178 			sc, 0, bnx_sysctl_rx_coal_bds, "I",
2179 			"Receive max coalesced BD count.");
2180 	SYSCTL_ADD_PROC(&sc->bnx_sysctl_ctx,
2181 			SYSCTL_CHILDREN(sc->bnx_sysctl_tree),
2182 			OID_AUTO, "tx_coal_bds",
2183 			CTLTYPE_INT | CTLFLAG_RW,
2184 			sc, 0, bnx_sysctl_tx_coal_bds, "I",
2185 			"Transmit max coalesced BD count.");
2186 	/*
2187 	 * A common design characteristic for many Broadcom
2188 	 * client controllers is that they only support a
2189 	 * single outstanding DMA read operation on the PCIe
2190 	 * bus. This means that it will take twice as long to
2191 	 * fetch a TX frame that is split into header and
2192 	 * payload buffers as it does to fetch a single,
2193 	 * contiguous TX frame (2 reads vs. 1 read). For these
2194 	 * controllers, coalescing buffers to reduce the number
2195 	 * of memory reads is effective way to get maximum
2196 	 * performance(about 940Mbps).  Without collapsing TX
2197 	 * buffers the maximum TCP bulk transfer performance
2198 	 * is about 850Mbps. However forcing coalescing mbufs
2199 	 * consumes a lot of CPU cycles, so leave it off by
2200 	 * default.
2201 	 */
2202 	SYSCTL_ADD_PROC(&sc->bnx_sysctl_ctx,
2203 	    SYSCTL_CHILDREN(sc->bnx_sysctl_tree), OID_AUTO,
2204 	    "force_defrag", CTLTYPE_INT | CTLFLAG_RW,
2205 	    sc, 0, bnx_sysctl_force_defrag, "I",
2206 	    "Force defragment on TX path");
2207 
2208 	SYSCTL_ADD_PROC(&sc->bnx_sysctl_ctx,
2209 	    SYSCTL_CHILDREN(sc->bnx_sysctl_tree), OID_AUTO,
2210 	    "tx_wreg", CTLTYPE_INT | CTLFLAG_RW,
2211 	    sc, 0, bnx_sysctl_tx_wreg, "I",
2212 	    "# of segments before writing to hardware register");
2213 
2214 	SYSCTL_ADD_PROC(&sc->bnx_sysctl_ctx,
2215 	    SYSCTL_CHILDREN(sc->bnx_sysctl_tree), OID_AUTO,
2216 	    "std_refill", CTLTYPE_INT | CTLFLAG_RW,
2217 	    sc, 0, bnx_sysctl_std_refill, "I",
2218 	    "# of packets received before scheduling standard refilling");
2219 
2220 	SYSCTL_ADD_PROC(&sc->bnx_sysctl_ctx,
2221 	    SYSCTL_CHILDREN(sc->bnx_sysctl_tree), OID_AUTO,
2222 	    "rx_coal_bds_int", CTLTYPE_INT | CTLFLAG_RW,
2223 	    sc, 0, bnx_sysctl_rx_coal_bds_int, "I",
2224 	    "Receive max coalesced BD count during interrupt.");
2225 	SYSCTL_ADD_PROC(&sc->bnx_sysctl_ctx,
2226 	    SYSCTL_CHILDREN(sc->bnx_sysctl_tree), OID_AUTO,
2227 	    "tx_coal_bds_int", CTLTYPE_INT | CTLFLAG_RW,
2228 	    sc, 0, bnx_sysctl_tx_coal_bds_int, "I",
2229 	    "Transmit max coalesced BD count during interrupt.");
2230 
2231 #ifdef IFPOLL_ENABLE
2232 	if (sc->bnx_flags & BNX_FLAG_RXTX_BUNDLE) {
2233 		SYSCTL_ADD_PROC(&sc->bnx_sysctl_ctx,
2234 		    SYSCTL_CHILDREN(sc->bnx_sysctl_tree), OID_AUTO,
2235 		    "npoll_offset", CTLTYPE_INT | CTLFLAG_RW,
2236 		    sc, 0, bnx_sysctl_npoll_offset, "I",
2237 		    "NPOLLING cpu offset");
2238 	} else {
2239 		SYSCTL_ADD_PROC(&sc->bnx_sysctl_ctx,
2240 		    SYSCTL_CHILDREN(sc->bnx_sysctl_tree), OID_AUTO,
2241 		    "npoll_rxoff", CTLTYPE_INT | CTLFLAG_RW,
2242 		    sc, 0, bnx_sysctl_npoll_rxoff, "I",
2243 		    "NPOLLING RX cpu offset");
2244 		SYSCTL_ADD_PROC(&sc->bnx_sysctl_ctx,
2245 		    SYSCTL_CHILDREN(sc->bnx_sysctl_tree), OID_AUTO,
2246 		    "npoll_txoff", CTLTYPE_INT | CTLFLAG_RW,
2247 		    sc, 0, bnx_sysctl_npoll_txoff, "I",
2248 		    "NPOLLING TX cpu offset");
2249 	}
2250 #endif
2251 
2252 #ifdef BNX_TSO_DEBUG
2253 	for (i = 0; i < BNX_TSO_NSTATS; ++i) {
2254 		ksnprintf(desc, sizeof(desc), "tso%d", i + 1);
2255 		SYSCTL_ADD_ULONG(&sc->bnx_sysctl_ctx,
2256 		    SYSCTL_CHILDREN(sc->bnx_sysctl_tree), OID_AUTO,
2257 		    desc, CTLFLAG_RW, &sc->bnx_tsosegs[i], "");
2258 	}
2259 #endif
2260 
2261 	/*
2262 	 * Call MI attach routine.
2263 	 */
2264 	ether_ifattach(ifp, ether_addr, NULL);
2265 
2266 	/* Setup TX rings and subqueues */
2267 	for (i = 0; i < sc->bnx_tx_ringcnt; ++i) {
2268 		struct ifaltq_subque *ifsq = ifq_get_subq(&ifp->if_snd, i);
2269 		struct bnx_tx_ring *txr = &sc->bnx_tx_ring[i];
2270 
2271 		ifsq_set_cpuid(ifsq, txr->bnx_tx_cpuid);
2272 		ifsq_set_hw_serialize(ifsq, &txr->bnx_tx_serialize);
2273 		ifsq_set_priv(ifsq, txr);
2274 		txr->bnx_ifsq = ifsq;
2275 
2276 		ifsq_watchdog_init(&txr->bnx_tx_watchdog, ifsq, bnx_watchdog);
2277 	}
2278 
2279 	error = bnx_setup_intr(sc);
2280 	if (error) {
2281 		ether_ifdetach(ifp);
2282 		goto fail;
2283 	}
2284 	bnx_set_tick_cpuid(sc, FALSE);
2285 
2286 	/*
2287 	 * Create RX standard ring refilling thread
2288 	 */
2289 	std_cpuid_def = device_get_unit(dev) % ncpus;
2290 	std_cpuid = device_getenv_int(dev, "std.cpuid", std_cpuid_def);
2291 	if (std_cpuid < 0 || std_cpuid >= ncpus) {
2292 		device_printf(dev, "invalid std.cpuid %d, use %d\n",
2293 		    std_cpuid, std_cpuid_def);
2294 		std_cpuid = std_cpuid_def;
2295 	}
2296 
2297 	std = &sc->bnx_rx_std_ring;
2298 	lwkt_create(bnx_rx_std_refill_ithread, std, NULL,
2299 	    &std->bnx_rx_std_ithread, TDF_NOSTART | TDF_INTTHREAD, std_cpuid,
2300 	    "%s std", device_get_nameunit(dev));
2301 	lwkt_setpri(&std->bnx_rx_std_ithread, TDPRI_INT_MED);
2302 	std->bnx_rx_std_ithread.td_preemptable = lwkt_preempt;
2303 	sc->bnx_flags |= BNX_FLAG_STD_THREAD;
2304 
2305 	return(0);
2306 fail:
2307 	bnx_detach(dev);
2308 	return(error);
2309 }
2310 
2311 static int
2312 bnx_detach(device_t dev)
2313 {
2314 	struct bnx_softc *sc = device_get_softc(dev);
2315 
2316 	if (device_is_attached(dev)) {
2317 		struct ifnet *ifp = &sc->arpcom.ac_if;
2318 
2319 		ifnet_serialize_all(ifp);
2320 		bnx_stop(sc);
2321 		bnx_reset(sc);
2322 		bnx_teardown_intr(sc, sc->bnx_intr_cnt);
2323 		ifnet_deserialize_all(ifp);
2324 
2325 		ether_ifdetach(ifp);
2326 	}
2327 
2328 	if (sc->bnx_flags & BNX_FLAG_STD_THREAD) {
2329 		struct bnx_rx_std_ring *std = &sc->bnx_rx_std_ring;
2330 
2331 		tsleep_interlock(std, 0);
2332 		std->bnx_rx_std_stop = 1;
2333 		cpu_sfence();
2334 		lwkt_schedule(&std->bnx_rx_std_ithread);
2335 		tsleep(std, PINTERLOCKED, "bnx_detach", 0);
2336 		if (bootverbose)
2337 			device_printf(dev, "RX std ithread exited\n");
2338 	}
2339 
2340 	if (sc->bnx_flags & BNX_FLAG_TBI)
2341 		ifmedia_removeall(&sc->bnx_ifmedia);
2342 	if (sc->bnx_miibus)
2343 		device_delete_child(dev, sc->bnx_miibus);
2344 	bus_generic_detach(dev);
2345 
2346 	bnx_free_intr(sc);
2347 
2348 	if (sc->bnx_res != NULL) {
2349 		bus_release_resource(dev, SYS_RES_MEMORY,
2350 		    BGE_PCI_BAR0, sc->bnx_res);
2351 	}
2352 
2353 	if (sc->bnx_sysctl_tree != NULL)
2354 		sysctl_ctx_free(&sc->bnx_sysctl_ctx);
2355 
2356 	bnx_dma_free(sc);
2357 
2358 	if (sc->bnx_serialize != NULL)
2359 		kfree(sc->bnx_serialize, M_DEVBUF);
2360 
2361 	return 0;
2362 }
2363 
2364 static void
2365 bnx_reset(struct bnx_softc *sc)
2366 {
2367 	device_t dev;
2368 	uint32_t cachesize, command, pcistate, reset;
2369 	void (*write_op)(struct bnx_softc *, uint32_t, uint32_t);
2370 	int i, val = 0;
2371 	uint16_t devctl;
2372 
2373 	dev = sc->bnx_dev;
2374 
2375 	write_op = bnx_writemem_direct;
2376 
2377 	/* Save some important PCI state. */
2378 	cachesize = pci_read_config(dev, BGE_PCI_CACHESZ, 4);
2379 	command = pci_read_config(dev, BGE_PCI_CMD, 4);
2380 	pcistate = pci_read_config(dev, BGE_PCI_PCISTATE, 4);
2381 
2382 	pci_write_config(dev, BGE_PCI_MISC_CTL,
2383 	    BGE_PCIMISCCTL_INDIRECT_ACCESS|BGE_PCIMISCCTL_MASK_PCI_INTR|
2384 	    BGE_HIF_SWAP_OPTIONS|BGE_PCIMISCCTL_PCISTATE_RW|
2385 	    BGE_PCIMISCCTL_TAGGED_STATUS, 4);
2386 
2387 	/* Disable fastboot on controllers that support it. */
2388 	if (bootverbose)
2389 		if_printf(&sc->arpcom.ac_if, "Disabling fastboot\n");
2390 	CSR_WRITE_4(sc, BGE_FASTBOOT_PC, 0x0);
2391 
2392 	/*
2393 	 * Write the magic number to SRAM at offset 0xB50.
2394 	 * When firmware finishes its initialization it will
2395 	 * write ~BGE_MAGIC_NUMBER to the same location.
2396 	 */
2397 	bnx_writemem_ind(sc, BGE_SOFTWARE_GENCOMM, BGE_MAGIC_NUMBER);
2398 
2399 	reset = BGE_MISCCFG_RESET_CORE_CLOCKS|(65<<1);
2400 
2401 	/* XXX: Broadcom Linux driver. */
2402 	/* Force PCI-E 1.0a mode */
2403 	if (!BNX_IS_57765_PLUS(sc) &&
2404 	    CSR_READ_4(sc, BGE_PCIE_PHY_TSTCTL) ==
2405 	    (BGE_PCIE_PHY_TSTCTL_PSCRAM |
2406 	     BGE_PCIE_PHY_TSTCTL_PCIE10)) {
2407 		CSR_WRITE_4(sc, BGE_PCIE_PHY_TSTCTL,
2408 		    BGE_PCIE_PHY_TSTCTL_PSCRAM);
2409 	}
2410 	if (sc->bnx_chipid != BGE_CHIPID_BCM5750_A0) {
2411 		/* Prevent PCIE link training during global reset */
2412 		CSR_WRITE_4(sc, BGE_MISC_CFG, (1<<29));
2413 		reset |= (1<<29);
2414 	}
2415 
2416 	/*
2417 	 * Set GPHY Power Down Override to leave GPHY
2418 	 * powered up in D0 uninitialized.
2419 	 */
2420 	if ((sc->bnx_flags & BNX_FLAG_CPMU) == 0)
2421 		reset |= BGE_MISCCFG_GPHY_PD_OVERRIDE;
2422 
2423 	/* Issue global reset */
2424 	write_op(sc, BGE_MISC_CFG, reset);
2425 
2426 	DELAY(1000);
2427 
2428 	/* XXX: Broadcom Linux driver. */
2429 	if (sc->bnx_chipid == BGE_CHIPID_BCM5750_A0) {
2430 		uint32_t v;
2431 
2432 		DELAY(500000); /* wait for link training to complete */
2433 		v = pci_read_config(dev, 0xc4, 4);
2434 		pci_write_config(dev, 0xc4, v | (1<<15), 4);
2435 	}
2436 
2437 	devctl = pci_read_config(dev, sc->bnx_pciecap + PCIER_DEVCTRL, 2);
2438 
2439 	/* Disable no snoop and disable relaxed ordering. */
2440 	devctl &= ~(PCIEM_DEVCTL_RELAX_ORDER | PCIEM_DEVCTL_NOSNOOP);
2441 
2442 	/* Old PCI-E chips only support 128 bytes Max PayLoad Size. */
2443 	if ((sc->bnx_flags & BNX_FLAG_CPMU) == 0) {
2444 		devctl &= ~PCIEM_DEVCTL_MAX_PAYLOAD_MASK;
2445 		devctl |= PCIEM_DEVCTL_MAX_PAYLOAD_128;
2446 	}
2447 
2448 	pci_write_config(dev, sc->bnx_pciecap + PCIER_DEVCTRL,
2449 	    devctl, 2);
2450 
2451 	/* Clear error status. */
2452 	pci_write_config(dev, sc->bnx_pciecap + PCIER_DEVSTS,
2453 	    PCIEM_DEVSTS_CORR_ERR |
2454 	    PCIEM_DEVSTS_NFATAL_ERR |
2455 	    PCIEM_DEVSTS_FATAL_ERR |
2456 	    PCIEM_DEVSTS_UNSUPP_REQ, 2);
2457 
2458 	/* Reset some of the PCI state that got zapped by reset */
2459 	pci_write_config(dev, BGE_PCI_MISC_CTL,
2460 	    BGE_PCIMISCCTL_INDIRECT_ACCESS|BGE_PCIMISCCTL_MASK_PCI_INTR|
2461 	    BGE_HIF_SWAP_OPTIONS|BGE_PCIMISCCTL_PCISTATE_RW|
2462 	    BGE_PCIMISCCTL_TAGGED_STATUS, 4);
2463 	pci_write_config(dev, BGE_PCI_CACHESZ, cachesize, 4);
2464 	pci_write_config(dev, BGE_PCI_CMD, command, 4);
2465 	write_op(sc, BGE_MISC_CFG, (65 << 1));
2466 
2467 	/* Enable memory arbiter */
2468 	CSR_WRITE_4(sc, BGE_MARB_MODE, BGE_MARBMODE_ENABLE);
2469 
2470 	/*
2471 	 * Poll until we see the 1's complement of the magic number.
2472 	 * This indicates that the firmware initialization is complete.
2473 	 */
2474 	for (i = 0; i < BNX_FIRMWARE_TIMEOUT; i++) {
2475 		val = bnx_readmem_ind(sc, BGE_SOFTWARE_GENCOMM);
2476 		if (val == ~BGE_MAGIC_NUMBER)
2477 			break;
2478 		DELAY(10);
2479 	}
2480 	if (i == BNX_FIRMWARE_TIMEOUT) {
2481 		if_printf(&sc->arpcom.ac_if, "firmware handshake "
2482 			  "timed out, found 0x%08x\n", val);
2483 	}
2484 
2485 	/* BCM57765 A0 needs additional time before accessing. */
2486 	if (sc->bnx_chipid == BGE_CHIPID_BCM57765_A0)
2487 		DELAY(10 * 1000);
2488 
2489 	/*
2490 	 * XXX Wait for the value of the PCISTATE register to
2491 	 * return to its original pre-reset state. This is a
2492 	 * fairly good indicator of reset completion. If we don't
2493 	 * wait for the reset to fully complete, trying to read
2494 	 * from the device's non-PCI registers may yield garbage
2495 	 * results.
2496 	 */
2497 	for (i = 0; i < BNX_TIMEOUT; i++) {
2498 		if (pci_read_config(dev, BGE_PCI_PCISTATE, 4) == pcistate)
2499 			break;
2500 		DELAY(10);
2501 	}
2502 
2503 	/* Fix up byte swapping */
2504 	CSR_WRITE_4(sc, BGE_MODE_CTL, bnx_dma_swap_options(sc));
2505 
2506 	CSR_WRITE_4(sc, BGE_MAC_MODE, 0);
2507 
2508 	/*
2509 	 * The 5704 in TBI mode apparently needs some special
2510 	 * adjustment to insure the SERDES drive level is set
2511 	 * to 1.2V.
2512 	 */
2513 	if (sc->bnx_asicrev == BGE_ASICREV_BCM5704 &&
2514 	    (sc->bnx_flags & BNX_FLAG_TBI)) {
2515 		uint32_t serdescfg;
2516 
2517 		serdescfg = CSR_READ_4(sc, BGE_SERDES_CFG);
2518 		serdescfg = (serdescfg & ~0xFFF) | 0x880;
2519 		CSR_WRITE_4(sc, BGE_SERDES_CFG, serdescfg);
2520 	}
2521 
2522 	CSR_WRITE_4(sc, BGE_MI_MODE,
2523 	    sc->bnx_mi_mode & ~BGE_MIMODE_AUTOPOLL);
2524 	DELAY(80);
2525 
2526 	/* XXX: Broadcom Linux driver. */
2527 	if (!BNX_IS_57765_PLUS(sc)) {
2528 		uint32_t v;
2529 
2530 		/* Enable Data FIFO protection. */
2531 		v = CSR_READ_4(sc, BGE_PCIE_TLDLPL_PORT);
2532 		CSR_WRITE_4(sc, BGE_PCIE_TLDLPL_PORT, v | (1 << 25));
2533 	}
2534 
2535 	DELAY(10000);
2536 
2537 	if (sc->bnx_asicrev == BGE_ASICREV_BCM5720) {
2538 		BNX_CLRBIT(sc, BGE_CPMU_CLCK_ORIDE,
2539 		    CPMU_CLCK_ORIDE_MAC_ORIDE_EN);
2540 	}
2541 }
2542 
2543 /*
2544  * Frame reception handling. This is called if there's a frame
2545  * on the receive return list.
2546  *
2547  * Note: we have to be able to handle two possibilities here:
2548  * 1) the frame is from the jumbo recieve ring
2549  * 2) the frame is from the standard receive ring
2550  */
2551 
2552 static void
2553 bnx_rxeof(struct bnx_rx_ret_ring *ret, uint16_t rx_prod, int count)
2554 {
2555 	struct bnx_softc *sc = ret->bnx_sc;
2556 	struct bnx_rx_std_ring *std = ret->bnx_std;
2557 	struct ifnet *ifp = &sc->arpcom.ac_if;
2558 
2559 	while (ret->bnx_rx_saved_considx != rx_prod && count != 0) {
2560 		struct bge_rx_bd *cur_rx;
2561 		struct bnx_rx_buf *rb;
2562 		uint32_t rxidx;
2563 		struct mbuf *m = NULL;
2564 		uint16_t vlan_tag = 0;
2565 		int have_tag = 0;
2566 
2567 		--count;
2568 
2569 		cur_rx = &ret->bnx_rx_ret_ring[ret->bnx_rx_saved_considx];
2570 
2571 		rxidx = cur_rx->bge_idx;
2572 		BNX_INC(ret->bnx_rx_saved_considx, BNX_RETURN_RING_CNT);
2573 
2574 		if (cur_rx->bge_flags & BGE_RXBDFLAG_VLAN_TAG) {
2575 			have_tag = 1;
2576 			vlan_tag = cur_rx->bge_vlan_tag;
2577 		}
2578 
2579 		if (ret->bnx_rx_cnt >= ret->bnx_rx_cntmax) {
2580 			ret->bnx_rx_cnt = 0;
2581 			cpu_sfence();
2582 			atomic_set_int(&std->bnx_rx_std_refill,
2583 			    ret->bnx_rx_mask);
2584 			if (atomic_poll_acquire_int(&std->bnx_rx_std_running))
2585 				lwkt_schedule(&std->bnx_rx_std_ithread);
2586 		}
2587 		ret->bnx_rx_cnt++;
2588 
2589 		rb = &std->bnx_rx_std_buf[rxidx];
2590 		m = rb->bnx_rx_mbuf;
2591 		if (cur_rx->bge_flags & BGE_RXBDFLAG_ERROR) {
2592 			IFNET_STAT_INC(ifp, ierrors, 1);
2593 			cpu_sfence();
2594 			rb->bnx_rx_refilled = 1;
2595 			continue;
2596 		}
2597 		if (bnx_newbuf_std(ret, rxidx, 0)) {
2598 			IFNET_STAT_INC(ifp, ierrors, 1);
2599 			continue;
2600 		}
2601 
2602 		IFNET_STAT_INC(ifp, ipackets, 1);
2603 		m->m_pkthdr.len = m->m_len = cur_rx->bge_len - ETHER_CRC_LEN;
2604 		m->m_pkthdr.rcvif = ifp;
2605 
2606 		if ((ifp->if_capenable & IFCAP_RXCSUM) &&
2607 		    (cur_rx->bge_flags & BGE_RXBDFLAG_IPV6) == 0) {
2608 			if (cur_rx->bge_flags & BGE_RXBDFLAG_IP_CSUM) {
2609 				m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED;
2610 				if ((cur_rx->bge_error_flag &
2611 				    BGE_RXERRFLAG_IP_CSUM_NOK) == 0)
2612 					m->m_pkthdr.csum_flags |= CSUM_IP_VALID;
2613 			}
2614 			if (cur_rx->bge_flags & BGE_RXBDFLAG_TCP_UDP_CSUM) {
2615 				m->m_pkthdr.csum_data =
2616 				    cur_rx->bge_tcp_udp_csum;
2617 				m->m_pkthdr.csum_flags |= CSUM_DATA_VALID |
2618 				    CSUM_PSEUDO_HDR;
2619 			}
2620 		}
2621 
2622 		/*
2623 		 * If we received a packet with a vlan tag, pass it
2624 		 * to vlan_input() instead of ether_input().
2625 		 */
2626 		if (have_tag) {
2627 			m->m_flags |= M_VLANTAG;
2628 			m->m_pkthdr.ether_vlantag = vlan_tag;
2629 		}
2630 		ifp->if_input(ifp, m);
2631 	}
2632 	bnx_writembx(sc, ret->bnx_rx_mbx, ret->bnx_rx_saved_considx);
2633 }
2634 
2635 static void
2636 bnx_txeof(struct bnx_tx_ring *txr, uint16_t tx_cons)
2637 {
2638 	struct ifnet *ifp = &txr->bnx_sc->arpcom.ac_if;
2639 
2640 	/*
2641 	 * Go through our tx ring and free mbufs for those
2642 	 * frames that have been sent.
2643 	 */
2644 	while (txr->bnx_tx_saved_considx != tx_cons) {
2645 		struct bnx_tx_buf *buf;
2646 		uint32_t idx = 0;
2647 
2648 		idx = txr->bnx_tx_saved_considx;
2649 		buf = &txr->bnx_tx_buf[idx];
2650 		if (buf->bnx_tx_mbuf != NULL) {
2651 			IFNET_STAT_INC(ifp, opackets, 1);
2652 			bus_dmamap_unload(txr->bnx_tx_mtag,
2653 			    buf->bnx_tx_dmamap);
2654 			m_freem(buf->bnx_tx_mbuf);
2655 			buf->bnx_tx_mbuf = NULL;
2656 		}
2657 		txr->bnx_tx_cnt--;
2658 		BNX_INC(txr->bnx_tx_saved_considx, BGE_TX_RING_CNT);
2659 	}
2660 
2661 	if ((BGE_TX_RING_CNT - txr->bnx_tx_cnt) >=
2662 	    (BNX_NSEG_RSVD + BNX_NSEG_SPARE))
2663 		ifsq_clr_oactive(txr->bnx_ifsq);
2664 
2665 	if (txr->bnx_tx_cnt == 0)
2666 		txr->bnx_tx_watchdog.wd_timer = 0;
2667 
2668 	if (!ifsq_is_empty(txr->bnx_ifsq))
2669 		ifsq_devstart(txr->bnx_ifsq);
2670 }
2671 
2672 #ifdef IFPOLL_ENABLE
2673 
2674 static void
2675 bnx_npoll_rx(struct ifnet *ifp __unused, void *xret, int cycle)
2676 {
2677 	struct bnx_rx_ret_ring *ret = xret;
2678 	uint16_t rx_prod;
2679 
2680 	ASSERT_SERIALIZED(&ret->bnx_rx_ret_serialize);
2681 
2682 	ret->bnx_saved_status_tag = *ret->bnx_hw_status_tag;
2683 	cpu_lfence();
2684 
2685 	rx_prod = *ret->bnx_rx_considx;
2686 	if (ret->bnx_rx_saved_considx != rx_prod)
2687 		bnx_rxeof(ret, rx_prod, cycle);
2688 }
2689 
2690 static void
2691 bnx_npoll_tx(struct ifnet *ifp __unused, void *xtxr, int cycle __unused)
2692 {
2693 	struct bnx_tx_ring *txr = xtxr;
2694 	uint16_t tx_cons;
2695 
2696 	ASSERT_SERIALIZED(&txr->bnx_tx_serialize);
2697 
2698 	tx_cons = *txr->bnx_tx_considx;
2699 	if (txr->bnx_tx_saved_considx != tx_cons)
2700 		bnx_txeof(txr, tx_cons);
2701 }
2702 
2703 static void
2704 bnx_npoll_status(struct ifnet *ifp)
2705 {
2706 	struct bnx_softc *sc = ifp->if_softc;
2707 	struct bge_status_block *sblk = sc->bnx_intr_data[0].bnx_status_block;
2708 
2709 	ASSERT_SERIALIZED(&sc->bnx_main_serialize);
2710 
2711 	if ((sblk->bge_status & BGE_STATFLAG_LINKSTATE_CHANGED) ||
2712 	    sc->bnx_link_evt)
2713 		bnx_link_poll(sc);
2714 }
2715 
2716 static void
2717 bnx_npoll(struct ifnet *ifp, struct ifpoll_info *info)
2718 {
2719 	struct bnx_softc *sc = ifp->if_softc;
2720 	int i;
2721 
2722 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
2723 
2724 	if (info != NULL) {
2725 		/*
2726 		 * TODO handle RXTX bundle and non-bundle
2727 		 */
2728 		info->ifpi_status.status_func = bnx_npoll_status;
2729 		info->ifpi_status.serializer = &sc->bnx_main_serialize;
2730 
2731 		for (i = 0; i < sc->bnx_tx_ringcnt; ++i) {
2732 			struct bnx_tx_ring *txr = &sc->bnx_tx_ring[i];
2733 			int idx = i + sc->bnx_npoll_txoff;
2734 
2735 			KKASSERT(idx < ncpus2);
2736 			info->ifpi_tx[idx].poll_func = bnx_npoll_tx;
2737 			info->ifpi_tx[idx].arg = txr;
2738 			info->ifpi_tx[idx].serializer = &txr->bnx_tx_serialize;
2739 			ifsq_set_cpuid(txr->bnx_ifsq, idx);
2740 		}
2741 
2742 		for (i = 0; i < sc->bnx_rx_retcnt; ++i) {
2743 			struct bnx_rx_ret_ring *ret = &sc->bnx_rx_ret_ring[i];
2744 			int idx = i + sc->bnx_npoll_rxoff;
2745 
2746 			KKASSERT(idx < ncpus2);
2747 			info->ifpi_rx[idx].poll_func = bnx_npoll_rx;
2748 			info->ifpi_rx[idx].arg = ret;
2749 			info->ifpi_rx[idx].serializer =
2750 			    &ret->bnx_rx_ret_serialize;
2751 		}
2752 
2753 		if (ifp->if_flags & IFF_RUNNING) {
2754 			bnx_disable_intr(sc);
2755 			bnx_set_tick_cpuid(sc, TRUE);
2756 		}
2757 	} else {
2758 		for (i = 0; i < sc->bnx_tx_ringcnt; ++i) {
2759 			ifsq_set_cpuid(sc->bnx_tx_ring[i].bnx_ifsq,
2760 			    sc->bnx_tx_ring[i].bnx_tx_cpuid);
2761 		}
2762 		if (ifp->if_flags & IFF_RUNNING) {
2763 			bnx_enable_intr(sc);
2764 			bnx_set_tick_cpuid(sc, FALSE);
2765 		}
2766 	}
2767 }
2768 
2769 #endif	/* IFPOLL_ENABLE */
2770 
2771 static void
2772 bnx_intr_legacy(void *xsc)
2773 {
2774 	struct bnx_softc *sc = xsc;
2775 	struct bnx_rx_ret_ring *ret = &sc->bnx_rx_ret_ring[0];
2776 
2777 	if (ret->bnx_saved_status_tag == *ret->bnx_hw_status_tag) {
2778 		uint32_t val;
2779 
2780 		val = pci_read_config(sc->bnx_dev, BGE_PCI_PCISTATE, 4);
2781 		if (val & BGE_PCISTAT_INTR_NOTACT)
2782 			return;
2783 	}
2784 
2785 	/*
2786 	 * NOTE:
2787 	 * Interrupt will have to be disabled if tagged status
2788 	 * is used, else interrupt will always be asserted on
2789 	 * certain chips (at least on BCM5750 AX/BX).
2790 	 */
2791 	bnx_writembx(sc, BGE_MBX_IRQ0_LO, 1);
2792 
2793 	bnx_intr(sc);
2794 }
2795 
2796 static void
2797 bnx_msi(void *xsc)
2798 {
2799 	bnx_intr(xsc);
2800 }
2801 
2802 static void
2803 bnx_intr(struct bnx_softc *sc)
2804 {
2805 	struct ifnet *ifp = &sc->arpcom.ac_if;
2806 	struct bnx_rx_ret_ring *ret = &sc->bnx_rx_ret_ring[0];
2807 	struct bge_status_block *sblk = sc->bnx_intr_data[0].bnx_status_block;
2808 	uint32_t status;
2809 
2810 	ASSERT_SERIALIZED(&sc->bnx_main_serialize);
2811 
2812 	ret->bnx_saved_status_tag = *ret->bnx_hw_status_tag;
2813 	/*
2814 	 * Use a load fence to ensure that status_tag is saved
2815 	 * before rx_prod, tx_cons and status.
2816 	 */
2817 	cpu_lfence();
2818 
2819 	status = sblk->bge_status;
2820 
2821 	if ((status & BGE_STATFLAG_LINKSTATE_CHANGED) || sc->bnx_link_evt)
2822 		bnx_link_poll(sc);
2823 
2824 	if (ifp->if_flags & IFF_RUNNING) {
2825 		struct bnx_tx_ring *txr = &sc->bnx_tx_ring[0];
2826 		uint16_t rx_prod, tx_cons;
2827 
2828 		lwkt_serialize_enter(&ret->bnx_rx_ret_serialize);
2829 		rx_prod = *ret->bnx_rx_considx;
2830 		if (ret->bnx_rx_saved_considx != rx_prod)
2831 			bnx_rxeof(ret, rx_prod, -1);
2832 		lwkt_serialize_exit(&ret->bnx_rx_ret_serialize);
2833 
2834 		lwkt_serialize_enter(&txr->bnx_tx_serialize);
2835 		tx_cons = *txr->bnx_tx_considx;
2836 		if (txr->bnx_tx_saved_considx != tx_cons)
2837 			bnx_txeof(txr, tx_cons);
2838 		lwkt_serialize_exit(&txr->bnx_tx_serialize);
2839 	}
2840 
2841 	bnx_writembx(sc, BGE_MBX_IRQ0_LO, ret->bnx_saved_status_tag << 24);
2842 }
2843 
2844 static void
2845 bnx_tick(void *xsc)
2846 {
2847 	struct bnx_softc *sc = xsc;
2848 
2849 	lwkt_serialize_enter(&sc->bnx_main_serialize);
2850 
2851 	bnx_stats_update_regs(sc);
2852 
2853 	if (sc->bnx_flags & BNX_FLAG_TBI) {
2854 		/*
2855 		 * Since in TBI mode auto-polling can't be used we should poll
2856 		 * link status manually. Here we register pending link event
2857 		 * and trigger interrupt.
2858 		 */
2859 		sc->bnx_link_evt++;
2860 		BNX_SETBIT(sc, BGE_HCC_MODE, BGE_HCCMODE_COAL_NOW);
2861 	} else if (!sc->bnx_link) {
2862 		mii_tick(device_get_softc(sc->bnx_miibus));
2863 	}
2864 
2865 	callout_reset_bycpu(&sc->bnx_tick_timer, hz, bnx_tick, sc,
2866 	    sc->bnx_tick_cpuid);
2867 
2868 	lwkt_serialize_exit(&sc->bnx_main_serialize);
2869 }
2870 
2871 static void
2872 bnx_stats_update_regs(struct bnx_softc *sc)
2873 {
2874 	struct ifnet *ifp = &sc->arpcom.ac_if;
2875 	struct bge_mac_stats_regs stats;
2876 	uint32_t *s;
2877 	int i;
2878 
2879 	s = (uint32_t *)&stats;
2880 	for (i = 0; i < sizeof(struct bge_mac_stats_regs); i += 4) {
2881 		*s = CSR_READ_4(sc, BGE_RX_STATS + i);
2882 		s++;
2883 	}
2884 
2885 	IFNET_STAT_SET(ifp, collisions,
2886 	   (stats.dot3StatsSingleCollisionFrames +
2887 	   stats.dot3StatsMultipleCollisionFrames +
2888 	   stats.dot3StatsExcessiveCollisions +
2889 	   stats.dot3StatsLateCollisions));
2890 }
2891 
2892 /*
2893  * Encapsulate an mbuf chain in the tx ring  by coupling the mbuf data
2894  * pointers to descriptors.
2895  */
2896 static int
2897 bnx_encap(struct bnx_tx_ring *txr, struct mbuf **m_head0, uint32_t *txidx,
2898     int *segs_used)
2899 {
2900 	struct bge_tx_bd *d = NULL;
2901 	uint16_t csum_flags = 0, vlan_tag = 0, mss = 0;
2902 	bus_dma_segment_t segs[BNX_NSEG_NEW];
2903 	bus_dmamap_t map;
2904 	int error, maxsegs, nsegs, idx, i;
2905 	struct mbuf *m_head = *m_head0, *m_new;
2906 
2907 	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
2908 #ifdef BNX_TSO_DEBUG
2909 		int tso_nsegs;
2910 #endif
2911 
2912 		error = bnx_setup_tso(txr, m_head0, &mss, &csum_flags);
2913 		if (error)
2914 			return error;
2915 		m_head = *m_head0;
2916 
2917 #ifdef BNX_TSO_DEBUG
2918 		tso_nsegs = (m_head->m_pkthdr.len /
2919 		    m_head->m_pkthdr.tso_segsz) - 1;
2920 		if (tso_nsegs > (BNX_TSO_NSTATS - 1))
2921 			tso_nsegs = BNX_TSO_NSTATS - 1;
2922 		else if (tso_nsegs < 0)
2923 			tso_nsegs = 0;
2924 		txr->bnx_sc->bnx_tsosegs[tso_nsegs]++;
2925 #endif
2926 	} else if (m_head->m_pkthdr.csum_flags & BNX_CSUM_FEATURES) {
2927 		if (m_head->m_pkthdr.csum_flags & CSUM_IP)
2928 			csum_flags |= BGE_TXBDFLAG_IP_CSUM;
2929 		if (m_head->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP))
2930 			csum_flags |= BGE_TXBDFLAG_TCP_UDP_CSUM;
2931 		if (m_head->m_flags & M_LASTFRAG)
2932 			csum_flags |= BGE_TXBDFLAG_IP_FRAG_END;
2933 		else if (m_head->m_flags & M_FRAG)
2934 			csum_flags |= BGE_TXBDFLAG_IP_FRAG;
2935 	}
2936 	if (m_head->m_flags & M_VLANTAG) {
2937 		csum_flags |= BGE_TXBDFLAG_VLAN_TAG;
2938 		vlan_tag = m_head->m_pkthdr.ether_vlantag;
2939 	}
2940 
2941 	idx = *txidx;
2942 	map = txr->bnx_tx_buf[idx].bnx_tx_dmamap;
2943 
2944 	maxsegs = (BGE_TX_RING_CNT - txr->bnx_tx_cnt) - BNX_NSEG_RSVD;
2945 	KASSERT(maxsegs >= BNX_NSEG_SPARE,
2946 		("not enough segments %d", maxsegs));
2947 
2948 	if (maxsegs > BNX_NSEG_NEW)
2949 		maxsegs = BNX_NSEG_NEW;
2950 
2951 	/*
2952 	 * Pad outbound frame to BGE_MIN_FRAMELEN for an unusual reason.
2953 	 * The bge hardware will pad out Tx runts to BGE_MIN_FRAMELEN,
2954 	 * but when such padded frames employ the bge IP/TCP checksum
2955 	 * offload, the hardware checksum assist gives incorrect results
2956 	 * (possibly from incorporating its own padding into the UDP/TCP
2957 	 * checksum; who knows).  If we pad such runts with zeros, the
2958 	 * onboard checksum comes out correct.
2959 	 */
2960 	if ((csum_flags & BGE_TXBDFLAG_TCP_UDP_CSUM) &&
2961 	    m_head->m_pkthdr.len < BNX_MIN_FRAMELEN) {
2962 		error = m_devpad(m_head, BNX_MIN_FRAMELEN);
2963 		if (error)
2964 			goto back;
2965 	}
2966 
2967 	if ((txr->bnx_tx_flags & BNX_TX_FLAG_SHORTDMA) &&
2968 	    m_head->m_next != NULL) {
2969 		m_new = bnx_defrag_shortdma(m_head);
2970 		if (m_new == NULL) {
2971 			error = ENOBUFS;
2972 			goto back;
2973 		}
2974 		*m_head0 = m_head = m_new;
2975 	}
2976 	if ((m_head->m_pkthdr.csum_flags & CSUM_TSO) == 0 &&
2977 	    (txr->bnx_tx_flags & BNX_TX_FLAG_FORCE_DEFRAG) &&
2978 	    m_head->m_next != NULL) {
2979 		/*
2980 		 * Forcefully defragment mbuf chain to overcome hardware
2981 		 * limitation which only support a single outstanding
2982 		 * DMA read operation.  If it fails, keep moving on using
2983 		 * the original mbuf chain.
2984 		 */
2985 		m_new = m_defrag(m_head, MB_DONTWAIT);
2986 		if (m_new != NULL)
2987 			*m_head0 = m_head = m_new;
2988 	}
2989 
2990 	error = bus_dmamap_load_mbuf_defrag(txr->bnx_tx_mtag, map,
2991 	    m_head0, segs, maxsegs, &nsegs, BUS_DMA_NOWAIT);
2992 	if (error)
2993 		goto back;
2994 	*segs_used += nsegs;
2995 
2996 	m_head = *m_head0;
2997 	bus_dmamap_sync(txr->bnx_tx_mtag, map, BUS_DMASYNC_PREWRITE);
2998 
2999 	for (i = 0; ; i++) {
3000 		d = &txr->bnx_tx_ring[idx];
3001 
3002 		d->bge_addr.bge_addr_lo = BGE_ADDR_LO(segs[i].ds_addr);
3003 		d->bge_addr.bge_addr_hi = BGE_ADDR_HI(segs[i].ds_addr);
3004 		d->bge_len = segs[i].ds_len;
3005 		d->bge_flags = csum_flags;
3006 		d->bge_vlan_tag = vlan_tag;
3007 		d->bge_mss = mss;
3008 
3009 		if (i == nsegs - 1)
3010 			break;
3011 		BNX_INC(idx, BGE_TX_RING_CNT);
3012 	}
3013 	/* Mark the last segment as end of packet... */
3014 	d->bge_flags |= BGE_TXBDFLAG_END;
3015 
3016 	/*
3017 	 * Insure that the map for this transmission is placed at
3018 	 * the array index of the last descriptor in this chain.
3019 	 */
3020 	txr->bnx_tx_buf[*txidx].bnx_tx_dmamap = txr->bnx_tx_buf[idx].bnx_tx_dmamap;
3021 	txr->bnx_tx_buf[idx].bnx_tx_dmamap = map;
3022 	txr->bnx_tx_buf[idx].bnx_tx_mbuf = m_head;
3023 	txr->bnx_tx_cnt += nsegs;
3024 
3025 	BNX_INC(idx, BGE_TX_RING_CNT);
3026 	*txidx = idx;
3027 back:
3028 	if (error) {
3029 		m_freem(*m_head0);
3030 		*m_head0 = NULL;
3031 	}
3032 	return error;
3033 }
3034 
3035 /*
3036  * Main transmit routine. To avoid having to do mbuf copies, we put pointers
3037  * to the mbuf data regions directly in the transmit descriptors.
3038  */
3039 static void
3040 bnx_start(struct ifnet *ifp, struct ifaltq_subque *ifsq)
3041 {
3042 	struct bnx_tx_ring *txr = ifsq_get_priv(ifsq);
3043 	struct mbuf *m_head = NULL;
3044 	uint32_t prodidx;
3045 	int nsegs = 0;
3046 
3047 	KKASSERT(txr->bnx_ifsq == ifsq);
3048 	ASSERT_SERIALIZED(&txr->bnx_tx_serialize);
3049 
3050 	if ((ifp->if_flags & IFF_RUNNING) == 0 || ifsq_is_oactive(ifsq))
3051 		return;
3052 
3053 	prodidx = txr->bnx_tx_prodidx;
3054 
3055 	while (txr->bnx_tx_buf[prodidx].bnx_tx_mbuf == NULL) {
3056 		/*
3057 		 * Sanity check: avoid coming within BGE_NSEG_RSVD
3058 		 * descriptors of the end of the ring.  Also make
3059 		 * sure there are BGE_NSEG_SPARE descriptors for
3060 		 * jumbo buffers' or TSO segments' defragmentation.
3061 		 */
3062 		if ((BGE_TX_RING_CNT - txr->bnx_tx_cnt) <
3063 		    (BNX_NSEG_RSVD + BNX_NSEG_SPARE)) {
3064 			ifsq_set_oactive(ifsq);
3065 			break;
3066 		}
3067 
3068 		m_head = ifsq_dequeue(ifsq, NULL);
3069 		if (m_head == NULL)
3070 			break;
3071 
3072 		/*
3073 		 * Pack the data into the transmit ring. If we
3074 		 * don't have room, set the OACTIVE flag and wait
3075 		 * for the NIC to drain the ring.
3076 		 */
3077 		if (bnx_encap(txr, &m_head, &prodidx, &nsegs)) {
3078 			ifsq_set_oactive(ifsq);
3079 			IFNET_STAT_INC(ifp, oerrors, 1);
3080 			break;
3081 		}
3082 
3083 		if (nsegs >= txr->bnx_tx_wreg) {
3084 			/* Transmit */
3085 			bnx_writembx(txr->bnx_sc, txr->bnx_tx_mbx, prodidx);
3086 			nsegs = 0;
3087 		}
3088 
3089 		ETHER_BPF_MTAP(ifp, m_head);
3090 
3091 		/*
3092 		 * Set a timeout in case the chip goes out to lunch.
3093 		 */
3094 		txr->bnx_tx_watchdog.wd_timer = 5;
3095 	}
3096 
3097 	if (nsegs > 0) {
3098 		/* Transmit */
3099 		bnx_writembx(txr->bnx_sc, txr->bnx_tx_mbx, prodidx);
3100 	}
3101 	txr->bnx_tx_prodidx = prodidx;
3102 }
3103 
3104 static void
3105 bnx_init(void *xsc)
3106 {
3107 	struct bnx_softc *sc = xsc;
3108 	struct ifnet *ifp = &sc->arpcom.ac_if;
3109 	uint16_t *m;
3110 	uint32_t mode;
3111 	int i;
3112 	boolean_t polling;
3113 
3114 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
3115 
3116 	/* Cancel pending I/O and flush buffers. */
3117 	bnx_stop(sc);
3118 	bnx_reset(sc);
3119 	bnx_chipinit(sc);
3120 
3121 	/*
3122 	 * Init the various state machines, ring
3123 	 * control blocks and firmware.
3124 	 */
3125 	if (bnx_blockinit(sc)) {
3126 		if_printf(ifp, "initialization failure\n");
3127 		bnx_stop(sc);
3128 		return;
3129 	}
3130 
3131 	/* Specify MTU. */
3132 	CSR_WRITE_4(sc, BGE_RX_MTU, ifp->if_mtu +
3133 	    ETHER_HDR_LEN + ETHER_CRC_LEN + EVL_ENCAPLEN);
3134 
3135 	/* Load our MAC address. */
3136 	m = (uint16_t *)&sc->arpcom.ac_enaddr[0];
3137 	CSR_WRITE_4(sc, BGE_MAC_ADDR1_LO, htons(m[0]));
3138 	CSR_WRITE_4(sc, BGE_MAC_ADDR1_HI, (htons(m[1]) << 16) | htons(m[2]));
3139 
3140 	/* Enable or disable promiscuous mode as needed. */
3141 	bnx_setpromisc(sc);
3142 
3143 	/* Program multicast filter. */
3144 	bnx_setmulti(sc);
3145 
3146 	/* Init RX ring. */
3147 	if (bnx_init_rx_ring_std(&sc->bnx_rx_std_ring)) {
3148 		if_printf(ifp, "RX ring initialization failed\n");
3149 		bnx_stop(sc);
3150 		return;
3151 	}
3152 
3153 	/* Init jumbo RX ring. */
3154 	if (ifp->if_mtu > (ETHERMTU + ETHER_HDR_LEN + ETHER_CRC_LEN)) {
3155 		if (bnx_init_rx_ring_jumbo(sc)) {
3156 			if_printf(ifp, "Jumbo RX ring initialization failed\n");
3157 			bnx_stop(sc);
3158 			return;
3159 		}
3160 	}
3161 
3162 	/* Init our RX return ring index */
3163 	for (i = 0; i < sc->bnx_rx_retcnt; ++i) {
3164 		struct bnx_rx_ret_ring *ret = &sc->bnx_rx_ret_ring[i];
3165 
3166 		ret->bnx_rx_saved_considx = 0;
3167 		ret->bnx_rx_cnt = 0;
3168 	}
3169 
3170 	/* Init TX ring. */
3171 	for (i = 0; i < sc->bnx_tx_ringcnt; ++i)
3172 		bnx_init_tx_ring(&sc->bnx_tx_ring[i]);
3173 
3174 	/* Enable TX MAC state machine lockup fix. */
3175 	mode = CSR_READ_4(sc, BGE_TX_MODE);
3176 	mode |= BGE_TXMODE_MBUF_LOCKUP_FIX;
3177 	if (sc->bnx_asicrev == BGE_ASICREV_BCM5720 ||
3178 	    sc->bnx_asicrev == BGE_ASICREV_BCM5762) {
3179 		mode &= ~(BGE_TXMODE_JMB_FRM_LEN | BGE_TXMODE_CNT_DN_MODE);
3180 		mode |= CSR_READ_4(sc, BGE_TX_MODE) &
3181 		    (BGE_TXMODE_JMB_FRM_LEN | BGE_TXMODE_CNT_DN_MODE);
3182 	}
3183 	/* Turn on transmitter */
3184 	CSR_WRITE_4(sc, BGE_TX_MODE, mode | BGE_TXMODE_ENABLE);
3185 
3186 	/* Turn on receiver */
3187 	BNX_SETBIT(sc, BGE_RX_MODE, BGE_RXMODE_ENABLE);
3188 
3189 	/*
3190 	 * Set the number of good frames to receive after RX MBUF
3191 	 * Low Watermark has been reached.  After the RX MAC receives
3192 	 * this number of frames, it will drop subsequent incoming
3193 	 * frames until the MBUF High Watermark is reached.
3194 	 */
3195 	if (BNX_IS_57765_FAMILY(sc))
3196 		CSR_WRITE_4(sc, BGE_MAX_RX_FRAME_LOWAT, 1);
3197 	else
3198 		CSR_WRITE_4(sc, BGE_MAX_RX_FRAME_LOWAT, 2);
3199 
3200 	if (sc->bnx_intr_type == PCI_INTR_TYPE_MSI) {
3201 		if (bootverbose) {
3202 			if_printf(ifp, "MSI_MODE: %#x\n",
3203 			    CSR_READ_4(sc, BGE_MSI_MODE));
3204 		}
3205 	}
3206 
3207 	/* Tell firmware we're alive. */
3208 	BNX_SETBIT(sc, BGE_MODE_CTL, BGE_MODECTL_STACKUP);
3209 
3210 	/* Enable host interrupts if polling(4) is not enabled. */
3211 	PCI_SETBIT(sc->bnx_dev, BGE_PCI_MISC_CTL, BGE_PCIMISCCTL_CLEAR_INTA, 4);
3212 
3213 	polling = FALSE;
3214 #ifdef IFPOLL_ENABLE
3215 	if (ifp->if_flags & IFF_NPOLLING)
3216 		polling = TRUE;
3217 #endif
3218 	if (polling)
3219 		bnx_disable_intr(sc);
3220 	else
3221 		bnx_enable_intr(sc);
3222 	bnx_set_tick_cpuid(sc, polling);
3223 
3224 	bnx_ifmedia_upd(ifp);
3225 
3226 	ifp->if_flags |= IFF_RUNNING;
3227 	for (i = 0; i < sc->bnx_tx_ringcnt; ++i) {
3228 		struct bnx_tx_ring *txr = &sc->bnx_tx_ring[i];
3229 
3230 		ifsq_clr_oactive(txr->bnx_ifsq);
3231 		ifsq_watchdog_start(&txr->bnx_tx_watchdog);
3232 	}
3233 
3234 	callout_reset_bycpu(&sc->bnx_tick_timer, hz, bnx_tick, sc,
3235 	    sc->bnx_tick_cpuid);
3236 }
3237 
3238 /*
3239  * Set media options.
3240  */
3241 static int
3242 bnx_ifmedia_upd(struct ifnet *ifp)
3243 {
3244 	struct bnx_softc *sc = ifp->if_softc;
3245 
3246 	/* If this is a 1000baseX NIC, enable the TBI port. */
3247 	if (sc->bnx_flags & BNX_FLAG_TBI) {
3248 		struct ifmedia *ifm = &sc->bnx_ifmedia;
3249 
3250 		if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
3251 			return(EINVAL);
3252 
3253 		switch(IFM_SUBTYPE(ifm->ifm_media)) {
3254 		case IFM_AUTO:
3255 			break;
3256 
3257 		case IFM_1000_SX:
3258 			if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) {
3259 				BNX_CLRBIT(sc, BGE_MAC_MODE,
3260 				    BGE_MACMODE_HALF_DUPLEX);
3261 			} else {
3262 				BNX_SETBIT(sc, BGE_MAC_MODE,
3263 				    BGE_MACMODE_HALF_DUPLEX);
3264 			}
3265 			break;
3266 		default:
3267 			return(EINVAL);
3268 		}
3269 	} else {
3270 		struct mii_data *mii = device_get_softc(sc->bnx_miibus);
3271 
3272 		sc->bnx_link_evt++;
3273 		sc->bnx_link = 0;
3274 		if (mii->mii_instance) {
3275 			struct mii_softc *miisc;
3276 
3277 			LIST_FOREACH(miisc, &mii->mii_phys, mii_list)
3278 				mii_phy_reset(miisc);
3279 		}
3280 		mii_mediachg(mii);
3281 
3282 		/*
3283 		 * Force an interrupt so that we will call bnx_link_upd
3284 		 * if needed and clear any pending link state attention.
3285 		 * Without this we are not getting any further interrupts
3286 		 * for link state changes and thus will not UP the link and
3287 		 * not be able to send in bnx_start.  The only way to get
3288 		 * things working was to receive a packet and get an RX
3289 		 * intr.
3290 		 *
3291 		 * bnx_tick should help for fiber cards and we might not
3292 		 * need to do this here if BNX_FLAG_TBI is set but as
3293 		 * we poll for fiber anyway it should not harm.
3294 		 */
3295 		BNX_SETBIT(sc, BGE_HCC_MODE, BGE_HCCMODE_COAL_NOW);
3296 	}
3297 	return(0);
3298 }
3299 
3300 /*
3301  * Report current media status.
3302  */
3303 static void
3304 bnx_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
3305 {
3306 	struct bnx_softc *sc = ifp->if_softc;
3307 
3308 	if (sc->bnx_flags & BNX_FLAG_TBI) {
3309 		ifmr->ifm_status = IFM_AVALID;
3310 		ifmr->ifm_active = IFM_ETHER;
3311 		if (CSR_READ_4(sc, BGE_MAC_STS) &
3312 		    BGE_MACSTAT_TBI_PCS_SYNCHED) {
3313 			ifmr->ifm_status |= IFM_ACTIVE;
3314 		} else {
3315 			ifmr->ifm_active |= IFM_NONE;
3316 			return;
3317 		}
3318 
3319 		ifmr->ifm_active |= IFM_1000_SX;
3320 		if (CSR_READ_4(sc, BGE_MAC_MODE) & BGE_MACMODE_HALF_DUPLEX)
3321 			ifmr->ifm_active |= IFM_HDX;
3322 		else
3323 			ifmr->ifm_active |= IFM_FDX;
3324 	} else {
3325 		struct mii_data *mii = device_get_softc(sc->bnx_miibus);
3326 
3327 		mii_pollstat(mii);
3328 		ifmr->ifm_active = mii->mii_media_active;
3329 		ifmr->ifm_status = mii->mii_media_status;
3330 	}
3331 }
3332 
3333 static int
3334 bnx_ioctl(struct ifnet *ifp, u_long command, caddr_t data, struct ucred *cr)
3335 {
3336 	struct bnx_softc *sc = ifp->if_softc;
3337 	struct ifreq *ifr = (struct ifreq *)data;
3338 	int mask, error = 0;
3339 
3340 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
3341 
3342 	switch (command) {
3343 	case SIOCSIFMTU:
3344 		if ((!BNX_IS_JUMBO_CAPABLE(sc) && ifr->ifr_mtu > ETHERMTU) ||
3345 		    (BNX_IS_JUMBO_CAPABLE(sc) &&
3346 		     ifr->ifr_mtu > BNX_JUMBO_MTU)) {
3347 			error = EINVAL;
3348 		} else if (ifp->if_mtu != ifr->ifr_mtu) {
3349 			ifp->if_mtu = ifr->ifr_mtu;
3350 			if (ifp->if_flags & IFF_RUNNING)
3351 				bnx_init(sc);
3352 		}
3353 		break;
3354 	case SIOCSIFFLAGS:
3355 		if (ifp->if_flags & IFF_UP) {
3356 			if (ifp->if_flags & IFF_RUNNING) {
3357 				mask = ifp->if_flags ^ sc->bnx_if_flags;
3358 
3359 				/*
3360 				 * If only the state of the PROMISC flag
3361 				 * changed, then just use the 'set promisc
3362 				 * mode' command instead of reinitializing
3363 				 * the entire NIC. Doing a full re-init
3364 				 * means reloading the firmware and waiting
3365 				 * for it to start up, which may take a
3366 				 * second or two.  Similarly for ALLMULTI.
3367 				 */
3368 				if (mask & IFF_PROMISC)
3369 					bnx_setpromisc(sc);
3370 				if (mask & IFF_ALLMULTI)
3371 					bnx_setmulti(sc);
3372 			} else {
3373 				bnx_init(sc);
3374 			}
3375 		} else if (ifp->if_flags & IFF_RUNNING) {
3376 			bnx_stop(sc);
3377 		}
3378 		sc->bnx_if_flags = ifp->if_flags;
3379 		break;
3380 	case SIOCADDMULTI:
3381 	case SIOCDELMULTI:
3382 		if (ifp->if_flags & IFF_RUNNING)
3383 			bnx_setmulti(sc);
3384 		break;
3385 	case SIOCSIFMEDIA:
3386 	case SIOCGIFMEDIA:
3387 		if (sc->bnx_flags & BNX_FLAG_TBI) {
3388 			error = ifmedia_ioctl(ifp, ifr,
3389 			    &sc->bnx_ifmedia, command);
3390 		} else {
3391 			struct mii_data *mii;
3392 
3393 			mii = device_get_softc(sc->bnx_miibus);
3394 			error = ifmedia_ioctl(ifp, ifr,
3395 					      &mii->mii_media, command);
3396 		}
3397 		break;
3398         case SIOCSIFCAP:
3399 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
3400 		if (mask & IFCAP_HWCSUM) {
3401 			ifp->if_capenable ^= (mask & IFCAP_HWCSUM);
3402 			if (ifp->if_capenable & IFCAP_TXCSUM)
3403 				ifp->if_hwassist |= BNX_CSUM_FEATURES;
3404 			else
3405 				ifp->if_hwassist &= ~BNX_CSUM_FEATURES;
3406 		}
3407 		if (mask & IFCAP_TSO) {
3408 			ifp->if_capenable ^= (mask & IFCAP_TSO);
3409 			if (ifp->if_capenable & IFCAP_TSO)
3410 				ifp->if_hwassist |= CSUM_TSO;
3411 			else
3412 				ifp->if_hwassist &= ~CSUM_TSO;
3413 		}
3414 		break;
3415 	default:
3416 		error = ether_ioctl(ifp, command, data);
3417 		break;
3418 	}
3419 	return error;
3420 }
3421 
3422 static void
3423 bnx_watchdog(struct ifaltq_subque *ifsq)
3424 {
3425 	struct ifnet *ifp = ifsq_get_ifp(ifsq);
3426 	struct bnx_softc *sc = ifp->if_softc;
3427 	int i;
3428 
3429 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
3430 
3431 	if_printf(ifp, "watchdog timeout -- resetting\n");
3432 
3433 	bnx_init(sc);
3434 
3435 	IFNET_STAT_INC(ifp, oerrors, 1);
3436 
3437 	for (i = 0; i < sc->bnx_tx_ringcnt; ++i)
3438 		ifsq_devstart_sched(sc->bnx_tx_ring[i].bnx_ifsq);
3439 }
3440 
3441 /*
3442  * Stop the adapter and free any mbufs allocated to the
3443  * RX and TX lists.
3444  */
3445 static void
3446 bnx_stop(struct bnx_softc *sc)
3447 {
3448 	struct ifnet *ifp = &sc->arpcom.ac_if;
3449 	int i;
3450 
3451 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
3452 
3453 	callout_stop(&sc->bnx_tick_timer);
3454 
3455 	/*
3456 	 * Disable all of the receiver blocks
3457 	 */
3458 	bnx_stop_block(sc, BGE_RX_MODE, BGE_RXMODE_ENABLE);
3459 	bnx_stop_block(sc, BGE_RBDI_MODE, BGE_RBDIMODE_ENABLE);
3460 	bnx_stop_block(sc, BGE_RXLP_MODE, BGE_RXLPMODE_ENABLE);
3461 	bnx_stop_block(sc, BGE_RDBDI_MODE, BGE_RBDIMODE_ENABLE);
3462 	bnx_stop_block(sc, BGE_RDC_MODE, BGE_RDCMODE_ENABLE);
3463 	bnx_stop_block(sc, BGE_RBDC_MODE, BGE_RBDCMODE_ENABLE);
3464 
3465 	/*
3466 	 * Disable all of the transmit blocks
3467 	 */
3468 	bnx_stop_block(sc, BGE_SRS_MODE, BGE_SRSMODE_ENABLE);
3469 	bnx_stop_block(sc, BGE_SBDI_MODE, BGE_SBDIMODE_ENABLE);
3470 	bnx_stop_block(sc, BGE_SDI_MODE, BGE_SDIMODE_ENABLE);
3471 	bnx_stop_block(sc, BGE_RDMA_MODE, BGE_RDMAMODE_ENABLE);
3472 	bnx_stop_block(sc, BGE_SDC_MODE, BGE_SDCMODE_ENABLE);
3473 	bnx_stop_block(sc, BGE_SBDC_MODE, BGE_SBDCMODE_ENABLE);
3474 
3475 	/*
3476 	 * Shut down all of the memory managers and related
3477 	 * state machines.
3478 	 */
3479 	bnx_stop_block(sc, BGE_HCC_MODE, BGE_HCCMODE_ENABLE);
3480 	bnx_stop_block(sc, BGE_WDMA_MODE, BGE_WDMAMODE_ENABLE);
3481 	CSR_WRITE_4(sc, BGE_FTQ_RESET, 0xFFFFFFFF);
3482 	CSR_WRITE_4(sc, BGE_FTQ_RESET, 0);
3483 
3484 	/* Disable host interrupts. */
3485 	bnx_disable_intr(sc);
3486 
3487 	/*
3488 	 * Tell firmware we're shutting down.
3489 	 */
3490 	BNX_CLRBIT(sc, BGE_MODE_CTL, BGE_MODECTL_STACKUP);
3491 
3492 	/* Free the RX lists. */
3493 	bnx_free_rx_ring_std(&sc->bnx_rx_std_ring);
3494 
3495 	/* Free jumbo RX list. */
3496 	if (BNX_IS_JUMBO_CAPABLE(sc))
3497 		bnx_free_rx_ring_jumbo(sc);
3498 
3499 	/* Free TX buffers. */
3500 	for (i = 0; i < sc->bnx_tx_ringcnt; ++i) {
3501 		struct bnx_tx_ring *txr = &sc->bnx_tx_ring[i];
3502 
3503 		txr->bnx_saved_status_tag = 0;
3504 		bnx_free_tx_ring(txr);
3505 	}
3506 
3507 	/* Clear saved status tag */
3508 	for (i = 0; i < sc->bnx_rx_retcnt; ++i)
3509 		sc->bnx_rx_ret_ring[i].bnx_saved_status_tag = 0;
3510 
3511 	sc->bnx_link = 0;
3512 	sc->bnx_coal_chg = 0;
3513 
3514 	ifp->if_flags &= ~IFF_RUNNING;
3515 	for (i = 0; i < sc->bnx_tx_ringcnt; ++i) {
3516 		struct bnx_tx_ring *txr = &sc->bnx_tx_ring[i];
3517 
3518 		ifsq_clr_oactive(txr->bnx_ifsq);
3519 		ifsq_watchdog_stop(&txr->bnx_tx_watchdog);
3520 	}
3521 }
3522 
3523 /*
3524  * Stop all chip I/O so that the kernel's probe routines don't
3525  * get confused by errant DMAs when rebooting.
3526  */
3527 static void
3528 bnx_shutdown(device_t dev)
3529 {
3530 	struct bnx_softc *sc = device_get_softc(dev);
3531 	struct ifnet *ifp = &sc->arpcom.ac_if;
3532 
3533 	ifnet_serialize_all(ifp);
3534 	bnx_stop(sc);
3535 	bnx_reset(sc);
3536 	ifnet_deserialize_all(ifp);
3537 }
3538 
3539 static int
3540 bnx_suspend(device_t dev)
3541 {
3542 	struct bnx_softc *sc = device_get_softc(dev);
3543 	struct ifnet *ifp = &sc->arpcom.ac_if;
3544 
3545 	ifnet_serialize_all(ifp);
3546 	bnx_stop(sc);
3547 	ifnet_deserialize_all(ifp);
3548 
3549 	return 0;
3550 }
3551 
3552 static int
3553 bnx_resume(device_t dev)
3554 {
3555 	struct bnx_softc *sc = device_get_softc(dev);
3556 	struct ifnet *ifp = &sc->arpcom.ac_if;
3557 
3558 	ifnet_serialize_all(ifp);
3559 
3560 	if (ifp->if_flags & IFF_UP) {
3561 		int i;
3562 
3563 		bnx_init(sc);
3564 		for (i = 0; i < sc->bnx_tx_ringcnt; ++i)
3565 			ifsq_devstart_sched(sc->bnx_tx_ring[i].bnx_ifsq);
3566 	}
3567 
3568 	ifnet_deserialize_all(ifp);
3569 
3570 	return 0;
3571 }
3572 
3573 static void
3574 bnx_setpromisc(struct bnx_softc *sc)
3575 {
3576 	struct ifnet *ifp = &sc->arpcom.ac_if;
3577 
3578 	if (ifp->if_flags & IFF_PROMISC)
3579 		BNX_SETBIT(sc, BGE_RX_MODE, BGE_RXMODE_RX_PROMISC);
3580 	else
3581 		BNX_CLRBIT(sc, BGE_RX_MODE, BGE_RXMODE_RX_PROMISC);
3582 }
3583 
3584 static void
3585 bnx_dma_free(struct bnx_softc *sc)
3586 {
3587 	struct bnx_rx_std_ring *std = &sc->bnx_rx_std_ring;
3588 	int i;
3589 
3590 	/* Destroy RX return rings */
3591 	if (sc->bnx_rx_ret_ring != NULL) {
3592 		for (i = 0; i < sc->bnx_rx_retcnt; ++i)
3593 			bnx_destroy_rx_ret_ring(&sc->bnx_rx_ret_ring[i]);
3594 		kfree(sc->bnx_rx_ret_ring, M_DEVBUF);
3595 	}
3596 
3597 	/* Destroy RX mbuf DMA stuffs. */
3598 	if (std->bnx_rx_mtag != NULL) {
3599 		for (i = 0; i < BGE_STD_RX_RING_CNT; i++) {
3600 			KKASSERT(std->bnx_rx_std_buf[i].bnx_rx_mbuf == NULL);
3601 			bus_dmamap_destroy(std->bnx_rx_mtag,
3602 			    std->bnx_rx_std_buf[i].bnx_rx_dmamap);
3603 		}
3604 		bus_dma_tag_destroy(std->bnx_rx_mtag);
3605 	}
3606 
3607 	/* Destroy standard RX ring */
3608 	bnx_dma_block_free(std->bnx_rx_std_ring_tag,
3609 	    std->bnx_rx_std_ring_map, std->bnx_rx_std_ring);
3610 
3611 	/* Destroy TX rings */
3612 	if (sc->bnx_tx_ring != NULL) {
3613 		for (i = 0; i < sc->bnx_tx_ringcnt; ++i)
3614 			bnx_destroy_tx_ring(&sc->bnx_tx_ring[i]);
3615 		kfree(sc->bnx_tx_ring, M_DEVBUF);
3616 	}
3617 
3618 	if (BNX_IS_JUMBO_CAPABLE(sc))
3619 		bnx_free_jumbo_mem(sc);
3620 
3621 	/* Destroy status blocks */
3622 	for (i = 0; i < sc->bnx_intr_cnt; ++i) {
3623 		struct bnx_intr_data *intr = &sc->bnx_intr_data[i];
3624 
3625 		bnx_dma_block_free(intr->bnx_status_tag,
3626 		    intr->bnx_status_map, intr->bnx_status_block);
3627 	}
3628 
3629 	/* Destroy the parent tag */
3630 	if (sc->bnx_cdata.bnx_parent_tag != NULL)
3631 		bus_dma_tag_destroy(sc->bnx_cdata.bnx_parent_tag);
3632 }
3633 
3634 static int
3635 bnx_dma_alloc(device_t dev)
3636 {
3637 	struct bnx_softc *sc = device_get_softc(dev);
3638 	struct bnx_rx_std_ring *std = &sc->bnx_rx_std_ring;
3639 	int i, error, mbx;
3640 
3641 	/*
3642 	 * Allocate the parent bus DMA tag appropriate for PCI.
3643 	 *
3644 	 * All of the NetExtreme/NetLink controllers have 4GB boundary
3645 	 * DMA bug.
3646 	 * Whenever an address crosses a multiple of the 4GB boundary
3647 	 * (including 4GB, 8Gb, 12Gb, etc.) and makes the transition
3648 	 * from 0xX_FFFF_FFFF to 0x(X+1)_0000_0000 an internal DMA
3649 	 * state machine will lockup and cause the device to hang.
3650 	 */
3651 	error = bus_dma_tag_create(NULL, 1, BGE_DMA_BOUNDARY_4G,
3652 	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
3653 	    BUS_SPACE_MAXSIZE_32BIT, 0, BUS_SPACE_MAXSIZE_32BIT,
3654 	    0, &sc->bnx_cdata.bnx_parent_tag);
3655 	if (error) {
3656 		device_printf(dev, "could not create parent DMA tag\n");
3657 		return error;
3658 	}
3659 
3660 	/*
3661 	 * Create DMA stuffs for status blocks.
3662 	 */
3663 	for (i = 0; i < sc->bnx_intr_cnt; ++i) {
3664 		struct bnx_intr_data *intr = &sc->bnx_intr_data[i];
3665 
3666 		error = bnx_dma_block_alloc(sc, BGE_STATUS_BLK_SZ,
3667 		    &intr->bnx_status_tag, &intr->bnx_status_map,
3668 		    (void *)&intr->bnx_status_block,
3669 		    &intr->bnx_status_block_paddr);
3670 		if (error) {
3671 			device_printf(dev,
3672 			    "could not create %dth status block\n", i);
3673 			return error;
3674 		}
3675 	}
3676 
3677 	/*
3678 	 * Create DMA tag and maps for RX mbufs.
3679 	 */
3680 	std->bnx_sc = sc;
3681 	lwkt_serialize_init(&std->bnx_rx_std_serialize);
3682 	error = bus_dma_tag_create(sc->bnx_cdata.bnx_parent_tag, 1, 0,
3683 	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
3684 	    NULL, NULL, MCLBYTES, 1, MCLBYTES,
3685 	    BUS_DMA_ALLOCNOW | BUS_DMA_WAITOK, &std->bnx_rx_mtag);
3686 	if (error) {
3687 		device_printf(dev, "could not create RX mbuf DMA tag\n");
3688 		return error;
3689 	}
3690 
3691 	for (i = 0; i < BGE_STD_RX_RING_CNT; ++i) {
3692 		error = bus_dmamap_create(std->bnx_rx_mtag, BUS_DMA_WAITOK,
3693 		    &std->bnx_rx_std_buf[i].bnx_rx_dmamap);
3694 		if (error) {
3695 			int j;
3696 
3697 			for (j = 0; j < i; ++j) {
3698 				bus_dmamap_destroy(std->bnx_rx_mtag,
3699 				    std->bnx_rx_std_buf[j].bnx_rx_dmamap);
3700 			}
3701 			bus_dma_tag_destroy(std->bnx_rx_mtag);
3702 			std->bnx_rx_mtag = NULL;
3703 
3704 			device_printf(dev,
3705 			    "could not create %dth RX mbuf DMA map\n", i);
3706 			return error;
3707 		}
3708 	}
3709 
3710 	/*
3711 	 * Create DMA stuffs for standard RX ring.
3712 	 */
3713 	error = bnx_dma_block_alloc(sc, BGE_STD_RX_RING_SZ,
3714 	    &std->bnx_rx_std_ring_tag,
3715 	    &std->bnx_rx_std_ring_map,
3716 	    (void *)&std->bnx_rx_std_ring,
3717 	    &std->bnx_rx_std_ring_paddr);
3718 	if (error) {
3719 		device_printf(dev, "could not create std RX ring\n");
3720 		return error;
3721 	}
3722 
3723 	/*
3724 	 * Create RX return rings
3725 	 */
3726 	mbx = BGE_MBX_RX_CONS0_LO;
3727 	sc->bnx_rx_ret_ring = kmalloc_cachealign(
3728 	    sizeof(struct bnx_rx_ret_ring) * sc->bnx_rx_retcnt, M_DEVBUF,
3729 	    M_WAITOK | M_ZERO);
3730 	for (i = 0; i < sc->bnx_rx_retcnt; ++i) {
3731 		struct bnx_rx_ret_ring *ret = &sc->bnx_rx_ret_ring[i];
3732 		struct bnx_intr_data *intr;
3733 
3734 		ret->bnx_sc = sc;
3735 		ret->bnx_std = std;
3736 		ret->bnx_rx_mbx = mbx;
3737 		ret->bnx_rx_cntmax = (BGE_STD_RX_RING_CNT / 4) /
3738 		    sc->bnx_rx_retcnt;
3739 		ret->bnx_rx_mask = 1 << i;
3740 
3741 		if (sc->bnx_rx_retcnt == 1) {
3742 			intr = &sc->bnx_intr_data[0];
3743 		} else {
3744 			KKASSERT(i + 1 < sc->bnx_intr_cnt);
3745 			intr = &sc->bnx_intr_data[i + 1];
3746 		}
3747 
3748 		ret->bnx_rx_considx =
3749 		    &intr->bnx_status_block->bge_idx[0].bge_rx_prod_idx;
3750 		ret->bnx_hw_status_tag =
3751 		    &intr->bnx_status_block->bge_status_tag;
3752 
3753 		error = bnx_create_rx_ret_ring(ret);
3754 		if (error) {
3755 			device_printf(dev,
3756 			    "could not create %dth RX ret ring\n", i);
3757 			return error;
3758 		}
3759 		mbx += 8;
3760 	}
3761 
3762 	/*
3763 	 * Create TX rings
3764 	 */
3765 	sc->bnx_tx_ring = kmalloc_cachealign(
3766 	    sizeof(struct bnx_tx_ring) * sc->bnx_tx_ringcnt, M_DEVBUF,
3767 	    M_WAITOK | M_ZERO);
3768 	for (i = 0; i < sc->bnx_tx_ringcnt; ++i) {
3769 		struct bnx_tx_ring *txr = &sc->bnx_tx_ring[i];
3770 		struct bnx_intr_data *intr;
3771 
3772 		txr->bnx_sc = sc;
3773 		txr->bnx_tx_mbx = bnx_tx_mailbox[i];
3774 
3775 		if (sc->bnx_tx_ringcnt == 1) {
3776 			intr = &sc->bnx_intr_data[0];
3777 		} else {
3778 			KKASSERT(i + 1 < sc->bnx_intr_cnt);
3779 			intr = &sc->bnx_intr_data[i + 1];
3780 		}
3781 
3782 		txr->bnx_tx_considx =
3783 		    &intr->bnx_status_block->bge_idx[0].bge_tx_cons_idx;
3784 
3785 		error = bnx_create_tx_ring(txr);
3786 		if (error) {
3787 			device_printf(dev,
3788 			    "could not create %dth TX ring\n", i);
3789 			return error;
3790 		}
3791 	}
3792 
3793 	/*
3794 	 * Create jumbo buffer pool.
3795 	 */
3796 	if (BNX_IS_JUMBO_CAPABLE(sc)) {
3797 		error = bnx_alloc_jumbo_mem(sc);
3798 		if (error) {
3799 			device_printf(dev,
3800 			    "could not create jumbo buffer pool\n");
3801 			return error;
3802 		}
3803 	}
3804 
3805 	return 0;
3806 }
3807 
3808 static int
3809 bnx_dma_block_alloc(struct bnx_softc *sc, bus_size_t size, bus_dma_tag_t *tag,
3810 		    bus_dmamap_t *map, void **addr, bus_addr_t *paddr)
3811 {
3812 	bus_dmamem_t dmem;
3813 	int error;
3814 
3815 	error = bus_dmamem_coherent(sc->bnx_cdata.bnx_parent_tag, PAGE_SIZE, 0,
3816 				    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
3817 				    size, BUS_DMA_WAITOK | BUS_DMA_ZERO, &dmem);
3818 	if (error)
3819 		return error;
3820 
3821 	*tag = dmem.dmem_tag;
3822 	*map = dmem.dmem_map;
3823 	*addr = dmem.dmem_addr;
3824 	*paddr = dmem.dmem_busaddr;
3825 
3826 	return 0;
3827 }
3828 
3829 static void
3830 bnx_dma_block_free(bus_dma_tag_t tag, bus_dmamap_t map, void *addr)
3831 {
3832 	if (tag != NULL) {
3833 		bus_dmamap_unload(tag, map);
3834 		bus_dmamem_free(tag, addr, map);
3835 		bus_dma_tag_destroy(tag);
3836 	}
3837 }
3838 
3839 static void
3840 bnx_tbi_link_upd(struct bnx_softc *sc, uint32_t status)
3841 {
3842 	struct ifnet *ifp = &sc->arpcom.ac_if;
3843 
3844 #define PCS_ENCODE_ERR	(BGE_MACSTAT_PORT_DECODE_ERROR|BGE_MACSTAT_MI_COMPLETE)
3845 
3846 	/*
3847 	 * Sometimes PCS encoding errors are detected in
3848 	 * TBI mode (on fiber NICs), and for some reason
3849 	 * the chip will signal them as link changes.
3850 	 * If we get a link change event, but the 'PCS
3851 	 * encoding error' bit in the MAC status register
3852 	 * is set, don't bother doing a link check.
3853 	 * This avoids spurious "gigabit link up" messages
3854 	 * that sometimes appear on fiber NICs during
3855 	 * periods of heavy traffic.
3856 	 */
3857 	if (status & BGE_MACSTAT_TBI_PCS_SYNCHED) {
3858 		if (!sc->bnx_link) {
3859 			sc->bnx_link++;
3860 			if (sc->bnx_asicrev == BGE_ASICREV_BCM5704) {
3861 				BNX_CLRBIT(sc, BGE_MAC_MODE,
3862 				    BGE_MACMODE_TBI_SEND_CFGS);
3863 			}
3864 			CSR_WRITE_4(sc, BGE_MAC_STS, 0xFFFFFFFF);
3865 
3866 			if (bootverbose)
3867 				if_printf(ifp, "link UP\n");
3868 
3869 			ifp->if_link_state = LINK_STATE_UP;
3870 			if_link_state_change(ifp);
3871 		}
3872 	} else if ((status & PCS_ENCODE_ERR) != PCS_ENCODE_ERR) {
3873 		if (sc->bnx_link) {
3874 			sc->bnx_link = 0;
3875 
3876 			if (bootverbose)
3877 				if_printf(ifp, "link DOWN\n");
3878 
3879 			ifp->if_link_state = LINK_STATE_DOWN;
3880 			if_link_state_change(ifp);
3881 		}
3882 	}
3883 
3884 #undef PCS_ENCODE_ERR
3885 
3886 	/* Clear the attention. */
3887 	CSR_WRITE_4(sc, BGE_MAC_STS, BGE_MACSTAT_SYNC_CHANGED |
3888 	    BGE_MACSTAT_CFG_CHANGED | BGE_MACSTAT_MI_COMPLETE |
3889 	    BGE_MACSTAT_LINK_CHANGED);
3890 }
3891 
3892 static void
3893 bnx_copper_link_upd(struct bnx_softc *sc, uint32_t status __unused)
3894 {
3895 	struct ifnet *ifp = &sc->arpcom.ac_if;
3896 	struct mii_data *mii = device_get_softc(sc->bnx_miibus);
3897 
3898 	mii_pollstat(mii);
3899 	bnx_miibus_statchg(sc->bnx_dev);
3900 
3901 	if (bootverbose) {
3902 		if (sc->bnx_link)
3903 			if_printf(ifp, "link UP\n");
3904 		else
3905 			if_printf(ifp, "link DOWN\n");
3906 	}
3907 
3908 	/* Clear the attention. */
3909 	CSR_WRITE_4(sc, BGE_MAC_STS, BGE_MACSTAT_SYNC_CHANGED |
3910 	    BGE_MACSTAT_CFG_CHANGED | BGE_MACSTAT_MI_COMPLETE |
3911 	    BGE_MACSTAT_LINK_CHANGED);
3912 }
3913 
3914 static void
3915 bnx_autopoll_link_upd(struct bnx_softc *sc, uint32_t status __unused)
3916 {
3917 	struct ifnet *ifp = &sc->arpcom.ac_if;
3918 	struct mii_data *mii = device_get_softc(sc->bnx_miibus);
3919 
3920 	mii_pollstat(mii);
3921 
3922 	if (!sc->bnx_link &&
3923 	    (mii->mii_media_status & IFM_ACTIVE) &&
3924 	    IFM_SUBTYPE(mii->mii_media_active) != IFM_NONE) {
3925 		sc->bnx_link++;
3926 		if (bootverbose)
3927 			if_printf(ifp, "link UP\n");
3928 	} else if (sc->bnx_link &&
3929 	    (!(mii->mii_media_status & IFM_ACTIVE) ||
3930 	    IFM_SUBTYPE(mii->mii_media_active) == IFM_NONE)) {
3931 		sc->bnx_link = 0;
3932 		if (bootverbose)
3933 			if_printf(ifp, "link DOWN\n");
3934 	}
3935 
3936 	/* Clear the attention. */
3937 	CSR_WRITE_4(sc, BGE_MAC_STS, BGE_MACSTAT_SYNC_CHANGED |
3938 	    BGE_MACSTAT_CFG_CHANGED | BGE_MACSTAT_MI_COMPLETE |
3939 	    BGE_MACSTAT_LINK_CHANGED);
3940 }
3941 
3942 static int
3943 bnx_sysctl_rx_coal_ticks(SYSCTL_HANDLER_ARGS)
3944 {
3945 	struct bnx_softc *sc = arg1;
3946 
3947 	return bnx_sysctl_coal_chg(oidp, arg1, arg2, req,
3948 	    &sc->bnx_rx_coal_ticks,
3949 	    BNX_RX_COAL_TICKS_MIN, BNX_RX_COAL_TICKS_MAX,
3950 	    BNX_RX_COAL_TICKS_CHG);
3951 }
3952 
3953 static int
3954 bnx_sysctl_tx_coal_ticks(SYSCTL_HANDLER_ARGS)
3955 {
3956 	struct bnx_softc *sc = arg1;
3957 
3958 	return bnx_sysctl_coal_chg(oidp, arg1, arg2, req,
3959 	    &sc->bnx_tx_coal_ticks,
3960 	    BNX_TX_COAL_TICKS_MIN, BNX_TX_COAL_TICKS_MAX,
3961 	    BNX_TX_COAL_TICKS_CHG);
3962 }
3963 
3964 static int
3965 bnx_sysctl_rx_coal_bds(SYSCTL_HANDLER_ARGS)
3966 {
3967 	struct bnx_softc *sc = arg1;
3968 
3969 	return bnx_sysctl_coal_chg(oidp, arg1, arg2, req,
3970 	    &sc->bnx_rx_coal_bds,
3971 	    BNX_RX_COAL_BDS_MIN, BNX_RX_COAL_BDS_MAX,
3972 	    BNX_RX_COAL_BDS_CHG);
3973 }
3974 
3975 static int
3976 bnx_sysctl_tx_coal_bds(SYSCTL_HANDLER_ARGS)
3977 {
3978 	struct bnx_softc *sc = arg1;
3979 
3980 	return bnx_sysctl_coal_chg(oidp, arg1, arg2, req,
3981 	    &sc->bnx_tx_coal_bds,
3982 	    BNX_TX_COAL_BDS_MIN, BNX_TX_COAL_BDS_MAX,
3983 	    BNX_TX_COAL_BDS_CHG);
3984 }
3985 
3986 static int
3987 bnx_sysctl_rx_coal_bds_int(SYSCTL_HANDLER_ARGS)
3988 {
3989 	struct bnx_softc *sc = arg1;
3990 
3991 	return bnx_sysctl_coal_chg(oidp, arg1, arg2, req,
3992 	    &sc->bnx_rx_coal_bds_int,
3993 	    BNX_RX_COAL_BDS_MIN, BNX_RX_COAL_BDS_MAX,
3994 	    BNX_RX_COAL_BDS_INT_CHG);
3995 }
3996 
3997 static int
3998 bnx_sysctl_tx_coal_bds_int(SYSCTL_HANDLER_ARGS)
3999 {
4000 	struct bnx_softc *sc = arg1;
4001 
4002 	return bnx_sysctl_coal_chg(oidp, arg1, arg2, req,
4003 	    &sc->bnx_tx_coal_bds_int,
4004 	    BNX_TX_COAL_BDS_MIN, BNX_TX_COAL_BDS_MAX,
4005 	    BNX_TX_COAL_BDS_INT_CHG);
4006 }
4007 
4008 static int
4009 bnx_sysctl_coal_chg(SYSCTL_HANDLER_ARGS, uint32_t *coal,
4010     int coal_min, int coal_max, uint32_t coal_chg_mask)
4011 {
4012 	struct bnx_softc *sc = arg1;
4013 	struct ifnet *ifp = &sc->arpcom.ac_if;
4014 	int error = 0, v;
4015 
4016 	ifnet_serialize_all(ifp);
4017 
4018 	v = *coal;
4019 	error = sysctl_handle_int(oidp, &v, 0, req);
4020 	if (!error && req->newptr != NULL) {
4021 		if (v < coal_min || v > coal_max) {
4022 			error = EINVAL;
4023 		} else {
4024 			*coal = v;
4025 			sc->bnx_coal_chg |= coal_chg_mask;
4026 
4027 			/* Commit changes */
4028 			bnx_coal_change(sc);
4029 		}
4030 	}
4031 
4032 	ifnet_deserialize_all(ifp);
4033 	return error;
4034 }
4035 
4036 static void
4037 bnx_coal_change(struct bnx_softc *sc)
4038 {
4039 	struct ifnet *ifp = &sc->arpcom.ac_if;
4040 
4041 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
4042 
4043 	if (sc->bnx_coal_chg & BNX_RX_COAL_TICKS_CHG) {
4044 		CSR_WRITE_4(sc, BGE_HCC_RX_COAL_TICKS,
4045 			    sc->bnx_rx_coal_ticks);
4046 		DELAY(10);
4047 		CSR_READ_4(sc, BGE_HCC_RX_COAL_TICKS);
4048 
4049 		if (bootverbose) {
4050 			if_printf(ifp, "rx_coal_ticks -> %u\n",
4051 				  sc->bnx_rx_coal_ticks);
4052 		}
4053 	}
4054 
4055 	if (sc->bnx_coal_chg & BNX_TX_COAL_TICKS_CHG) {
4056 		CSR_WRITE_4(sc, BGE_HCC_TX_COAL_TICKS,
4057 			    sc->bnx_tx_coal_ticks);
4058 		DELAY(10);
4059 		CSR_READ_4(sc, BGE_HCC_TX_COAL_TICKS);
4060 
4061 		if (bootverbose) {
4062 			if_printf(ifp, "tx_coal_ticks -> %u\n",
4063 				  sc->bnx_tx_coal_ticks);
4064 		}
4065 	}
4066 
4067 	if (sc->bnx_coal_chg & BNX_RX_COAL_BDS_CHG) {
4068 		CSR_WRITE_4(sc, BGE_HCC_RX_MAX_COAL_BDS,
4069 			    sc->bnx_rx_coal_bds);
4070 		DELAY(10);
4071 		CSR_READ_4(sc, BGE_HCC_RX_MAX_COAL_BDS);
4072 
4073 		if (bootverbose) {
4074 			if_printf(ifp, "rx_coal_bds -> %u\n",
4075 				  sc->bnx_rx_coal_bds);
4076 		}
4077 	}
4078 
4079 	if (sc->bnx_coal_chg & BNX_TX_COAL_BDS_CHG) {
4080 		CSR_WRITE_4(sc, BGE_HCC_TX_MAX_COAL_BDS,
4081 			    sc->bnx_tx_coal_bds);
4082 		DELAY(10);
4083 		CSR_READ_4(sc, BGE_HCC_TX_MAX_COAL_BDS);
4084 
4085 		if (bootverbose) {
4086 			if_printf(ifp, "tx_coal_bds -> %u\n",
4087 				  sc->bnx_tx_coal_bds);
4088 		}
4089 	}
4090 
4091 	if (sc->bnx_coal_chg & BNX_RX_COAL_BDS_INT_CHG) {
4092 		CSR_WRITE_4(sc, BGE_HCC_RX_MAX_COAL_BDS_INT,
4093 		    sc->bnx_rx_coal_bds_int);
4094 		DELAY(10);
4095 		CSR_READ_4(sc, BGE_HCC_RX_MAX_COAL_BDS_INT);
4096 
4097 		if (bootverbose) {
4098 			if_printf(ifp, "rx_coal_bds_int -> %u\n",
4099 			    sc->bnx_rx_coal_bds_int);
4100 		}
4101 	}
4102 
4103 	if (sc->bnx_coal_chg & BNX_TX_COAL_BDS_INT_CHG) {
4104 		CSR_WRITE_4(sc, BGE_HCC_TX_MAX_COAL_BDS_INT,
4105 		    sc->bnx_tx_coal_bds_int);
4106 		DELAY(10);
4107 		CSR_READ_4(sc, BGE_HCC_TX_MAX_COAL_BDS_INT);
4108 
4109 		if (bootverbose) {
4110 			if_printf(ifp, "tx_coal_bds_int -> %u\n",
4111 			    sc->bnx_tx_coal_bds_int);
4112 		}
4113 	}
4114 
4115 	sc->bnx_coal_chg = 0;
4116 }
4117 
4118 static void
4119 bnx_check_intr(void *xintr)
4120 {
4121 	struct bnx_intr_data *intr = xintr;
4122 	struct bnx_rx_ret_ring *ret;
4123 	struct bnx_tx_ring *txr;
4124 	struct ifnet *ifp;
4125 
4126 	lwkt_serialize_enter(intr->bnx_intr_serialize);
4127 
4128 	KKASSERT(mycpuid == intr->bnx_intr_cpuid);
4129 
4130 	ifp = &intr->bnx_sc->arpcom.ac_if;
4131 	if ((ifp->if_flags & (IFF_RUNNING | IFF_NPOLLING)) != IFF_RUNNING) {
4132 		lwkt_serialize_exit(intr->bnx_intr_serialize);
4133 		return;
4134 	}
4135 
4136 	txr = intr->bnx_txr;
4137 	ret = intr->bnx_ret;
4138 
4139 	if (*ret->bnx_rx_considx != ret->bnx_rx_saved_considx ||
4140 	    *txr->bnx_tx_considx != txr->bnx_tx_saved_considx) {
4141 		if (intr->bnx_rx_check_considx == ret->bnx_rx_saved_considx &&
4142 		    intr->bnx_tx_check_considx == txr->bnx_tx_saved_considx) {
4143 			if (!intr->bnx_intr_maylose) {
4144 				intr->bnx_intr_maylose = TRUE;
4145 				goto done;
4146 			}
4147 			if (bootverbose)
4148 				if_printf(ifp, "lost interrupt\n");
4149 			intr->bnx_intr_func(intr->bnx_intr_arg);
4150 		}
4151 	}
4152 	intr->bnx_intr_maylose = FALSE;
4153 	intr->bnx_rx_check_considx = ret->bnx_rx_saved_considx;
4154 	intr->bnx_tx_check_considx = txr->bnx_tx_saved_considx;
4155 
4156 done:
4157 	callout_reset(&intr->bnx_intr_timer, BNX_INTR_CKINTVL,
4158 	    intr->bnx_intr_check, intr);
4159 	lwkt_serialize_exit(intr->bnx_intr_serialize);
4160 }
4161 
4162 static void
4163 bnx_enable_intr(struct bnx_softc *sc)
4164 {
4165 	struct ifnet *ifp = &sc->arpcom.ac_if;
4166 	int i;
4167 
4168 	for (i = 0; i < sc->bnx_intr_cnt; ++i) {
4169 		lwkt_serialize_handler_enable(
4170 		    sc->bnx_intr_data[i].bnx_intr_serialize);
4171 	}
4172 
4173 	/*
4174 	 * Enable interrupt.
4175 	 */
4176 	for (i = 0; i < sc->bnx_intr_cnt; ++i) {
4177 		struct bnx_intr_data *intr = &sc->bnx_intr_data[i];
4178 
4179 		bnx_writembx(sc, intr->bnx_intr_mbx,
4180 		    (*intr->bnx_saved_status_tag) << 24);
4181 		/* XXX Linux driver */
4182 		bnx_writembx(sc, intr->bnx_intr_mbx,
4183 		    (*intr->bnx_saved_status_tag) << 24);
4184 	}
4185 
4186 	/*
4187 	 * Unmask the interrupt when we stop polling.
4188 	 */
4189 	PCI_CLRBIT(sc->bnx_dev, BGE_PCI_MISC_CTL,
4190 	    BGE_PCIMISCCTL_MASK_PCI_INTR, 4);
4191 
4192 	/*
4193 	 * Trigger another interrupt, since above writing
4194 	 * to interrupt mailbox0 may acknowledge pending
4195 	 * interrupt.
4196 	 */
4197 	BNX_SETBIT(sc, BGE_MISC_LOCAL_CTL, BGE_MLC_INTR_SET);
4198 
4199 	if (sc->bnx_flags & BNX_FLAG_STATUSTAG_BUG) {
4200 		if (bootverbose)
4201 			if_printf(ifp, "status tag bug workaround\n");
4202 
4203 		for (i = 0; i < sc->bnx_intr_cnt; ++i) {
4204 			struct bnx_intr_data *intr = &sc->bnx_intr_data[i];
4205 
4206 			intr->bnx_intr_maylose = FALSE;
4207 			intr->bnx_rx_check_considx = 0;
4208 			intr->bnx_tx_check_considx = 0;
4209 			callout_reset_bycpu(&intr->bnx_intr_timer,
4210 			    BNX_INTR_CKINTVL, intr->bnx_intr_check, intr,
4211 			    intr->bnx_intr_cpuid);
4212 		}
4213 	}
4214 }
4215 
4216 static void
4217 bnx_disable_intr(struct bnx_softc *sc)
4218 {
4219 	int i;
4220 
4221 	for (i = 0; i < sc->bnx_intr_cnt; ++i) {
4222 		struct bnx_intr_data *intr = &sc->bnx_intr_data[i];
4223 
4224 		callout_stop(&intr->bnx_intr_timer);
4225 		intr->bnx_intr_maylose = FALSE;
4226 		intr->bnx_rx_check_considx = 0;
4227 		intr->bnx_tx_check_considx = 0;
4228 	}
4229 
4230 	/*
4231 	 * Mask the interrupt when we start polling.
4232 	 */
4233 	PCI_SETBIT(sc->bnx_dev, BGE_PCI_MISC_CTL,
4234 	    BGE_PCIMISCCTL_MASK_PCI_INTR, 4);
4235 
4236 	/*
4237 	 * Acknowledge possible asserted interrupt.
4238 	 */
4239 	for (i = 0; i < sc->bnx_intr_cnt; ++i)
4240 		bnx_writembx(sc, sc->bnx_intr_data[i].bnx_intr_mbx, 1);
4241 
4242 	for (i = 0; i < sc->bnx_intr_cnt; ++i) {
4243 		lwkt_serialize_handler_disable(
4244 		    sc->bnx_intr_data[i].bnx_intr_serialize);
4245 	}
4246 }
4247 
4248 static int
4249 bnx_get_eaddr_mem(struct bnx_softc *sc, uint8_t ether_addr[])
4250 {
4251 	uint32_t mac_addr;
4252 	int ret = 1;
4253 
4254 	mac_addr = bnx_readmem_ind(sc, 0x0c14);
4255 	if ((mac_addr >> 16) == 0x484b) {
4256 		ether_addr[0] = (uint8_t)(mac_addr >> 8);
4257 		ether_addr[1] = (uint8_t)mac_addr;
4258 		mac_addr = bnx_readmem_ind(sc, 0x0c18);
4259 		ether_addr[2] = (uint8_t)(mac_addr >> 24);
4260 		ether_addr[3] = (uint8_t)(mac_addr >> 16);
4261 		ether_addr[4] = (uint8_t)(mac_addr >> 8);
4262 		ether_addr[5] = (uint8_t)mac_addr;
4263 		ret = 0;
4264 	}
4265 	return ret;
4266 }
4267 
4268 static int
4269 bnx_get_eaddr_nvram(struct bnx_softc *sc, uint8_t ether_addr[])
4270 {
4271 	int mac_offset = BGE_EE_MAC_OFFSET;
4272 
4273 	if (BNX_IS_5717_PLUS(sc)) {
4274 		int f;
4275 
4276 		f = pci_get_function(sc->bnx_dev);
4277 		if (f & 1)
4278 			mac_offset = BGE_EE_MAC_OFFSET_5717;
4279 		if (f > 1)
4280 			mac_offset += BGE_EE_MAC_OFFSET_5717_OFF;
4281 	}
4282 
4283 	return bnx_read_nvram(sc, ether_addr, mac_offset + 2, ETHER_ADDR_LEN);
4284 }
4285 
4286 static int
4287 bnx_get_eaddr_eeprom(struct bnx_softc *sc, uint8_t ether_addr[])
4288 {
4289 	if (sc->bnx_flags & BNX_FLAG_NO_EEPROM)
4290 		return 1;
4291 
4292 	return bnx_read_eeprom(sc, ether_addr, BGE_EE_MAC_OFFSET + 2,
4293 			       ETHER_ADDR_LEN);
4294 }
4295 
4296 static int
4297 bnx_get_eaddr(struct bnx_softc *sc, uint8_t eaddr[])
4298 {
4299 	static const bnx_eaddr_fcn_t bnx_eaddr_funcs[] = {
4300 		/* NOTE: Order is critical */
4301 		bnx_get_eaddr_mem,
4302 		bnx_get_eaddr_nvram,
4303 		bnx_get_eaddr_eeprom,
4304 		NULL
4305 	};
4306 	const bnx_eaddr_fcn_t *func;
4307 
4308 	for (func = bnx_eaddr_funcs; *func != NULL; ++func) {
4309 		if ((*func)(sc, eaddr) == 0)
4310 			break;
4311 	}
4312 	return (*func == NULL ? ENXIO : 0);
4313 }
4314 
4315 /*
4316  * NOTE: 'm' is not freed upon failure
4317  */
4318 struct mbuf *
4319 bnx_defrag_shortdma(struct mbuf *m)
4320 {
4321 	struct mbuf *n;
4322 	int found;
4323 
4324 	/*
4325 	 * If device receive two back-to-back send BDs with less than
4326 	 * or equal to 8 total bytes then the device may hang.  The two
4327 	 * back-to-back send BDs must in the same frame for this failure
4328 	 * to occur.  Scan mbuf chains and see whether two back-to-back
4329 	 * send BDs are there.  If this is the case, allocate new mbuf
4330 	 * and copy the frame to workaround the silicon bug.
4331 	 */
4332 	for (n = m, found = 0; n != NULL; n = n->m_next) {
4333 		if (n->m_len < 8) {
4334 			found++;
4335 			if (found > 1)
4336 				break;
4337 			continue;
4338 		}
4339 		found = 0;
4340 	}
4341 
4342 	if (found > 1)
4343 		n = m_defrag(m, MB_DONTWAIT);
4344 	else
4345 		n = m;
4346 	return n;
4347 }
4348 
4349 static void
4350 bnx_stop_block(struct bnx_softc *sc, bus_size_t reg, uint32_t bit)
4351 {
4352 	int i;
4353 
4354 	BNX_CLRBIT(sc, reg, bit);
4355 	for (i = 0; i < BNX_TIMEOUT; i++) {
4356 		if ((CSR_READ_4(sc, reg) & bit) == 0)
4357 			return;
4358 		DELAY(100);
4359 	}
4360 }
4361 
4362 static void
4363 bnx_link_poll(struct bnx_softc *sc)
4364 {
4365 	uint32_t status;
4366 
4367 	status = CSR_READ_4(sc, BGE_MAC_STS);
4368 	if ((status & sc->bnx_link_chg) || sc->bnx_link_evt) {
4369 		sc->bnx_link_evt = 0;
4370 		sc->bnx_link_upd(sc, status);
4371 	}
4372 }
4373 
4374 static void
4375 bnx_enable_msi(struct bnx_softc *sc)
4376 {
4377 	uint32_t msi_mode;
4378 
4379 	msi_mode = CSR_READ_4(sc, BGE_MSI_MODE);
4380 	msi_mode |= BGE_MSIMODE_ENABLE;
4381 	/*
4382 	 * NOTE:
4383 	 * 5718-PG105-R says that "one shot" mode does not work
4384 	 * if MSI is used, however, it obviously works.
4385 	 */
4386 	msi_mode &= ~BGE_MSIMODE_ONESHOT_DISABLE;
4387 	CSR_WRITE_4(sc, BGE_MSI_MODE, msi_mode);
4388 }
4389 
4390 static uint32_t
4391 bnx_dma_swap_options(struct bnx_softc *sc)
4392 {
4393 	uint32_t dma_options;
4394 
4395 	dma_options = BGE_MODECTL_WORDSWAP_NONFRAME |
4396 	    BGE_MODECTL_BYTESWAP_DATA | BGE_MODECTL_WORDSWAP_DATA;
4397 #if BYTE_ORDER == BIG_ENDIAN
4398 	dma_options |= BGE_MODECTL_BYTESWAP_NONFRAME;
4399 #endif
4400 	if (sc->bnx_asicrev == BGE_ASICREV_BCM5720 ||
4401 	    sc->bnx_asicrev == BGE_ASICREV_BCM5762) {
4402 		dma_options |= BGE_MODECTL_BYTESWAP_B2HRX_DATA |
4403 		    BGE_MODECTL_WORDSWAP_B2HRX_DATA | BGE_MODECTL_B2HRX_ENABLE |
4404 		    BGE_MODECTL_HTX2B_ENABLE;
4405 	}
4406 	return dma_options;
4407 }
4408 
4409 static int
4410 bnx_setup_tso(struct bnx_tx_ring *txr, struct mbuf **mp,
4411     uint16_t *mss0, uint16_t *flags0)
4412 {
4413 	struct mbuf *m;
4414 	struct ip *ip;
4415 	struct tcphdr *th;
4416 	int thoff, iphlen, hoff, hlen;
4417 	uint16_t flags, mss;
4418 
4419 	m = *mp;
4420 	KASSERT(M_WRITABLE(m), ("TSO mbuf not writable"));
4421 
4422 	hoff = m->m_pkthdr.csum_lhlen;
4423 	iphlen = m->m_pkthdr.csum_iphlen;
4424 	thoff = m->m_pkthdr.csum_thlen;
4425 
4426 	KASSERT(hoff > 0, ("invalid ether header len"));
4427 	KASSERT(iphlen > 0, ("invalid ip header len"));
4428 	KASSERT(thoff > 0, ("invalid tcp header len"));
4429 
4430 	if (__predict_false(m->m_len < hoff + iphlen + thoff)) {
4431 		m = m_pullup(m, hoff + iphlen + thoff);
4432 		if (m == NULL) {
4433 			*mp = NULL;
4434 			return ENOBUFS;
4435 		}
4436 		*mp = m;
4437 	}
4438 	ip = mtodoff(m, struct ip *, hoff);
4439 	th = mtodoff(m, struct tcphdr *, hoff + iphlen);
4440 
4441 	mss = m->m_pkthdr.tso_segsz;
4442 	flags = BGE_TXBDFLAG_CPU_PRE_DMA | BGE_TXBDFLAG_CPU_POST_DMA;
4443 
4444 	ip->ip_len = htons(mss + iphlen + thoff);
4445 	th->th_sum = 0;
4446 
4447 	hlen = (iphlen + thoff) >> 2;
4448 	mss |= ((hlen & 0x3) << 14);
4449 	flags |= ((hlen & 0xf8) << 7) | ((hlen & 0x4) << 2);
4450 
4451 	*mss0 = mss;
4452 	*flags0 = flags;
4453 
4454 	return 0;
4455 }
4456 
4457 static int
4458 bnx_create_tx_ring(struct bnx_tx_ring *txr)
4459 {
4460 	bus_size_t txmaxsz, txmaxsegsz;
4461 	int i, error;
4462 
4463 	lwkt_serialize_init(&txr->bnx_tx_serialize);
4464 
4465 	/*
4466 	 * Create DMA tag and maps for TX mbufs.
4467 	 */
4468 	if (txr->bnx_sc->bnx_flags & BNX_FLAG_TSO)
4469 		txmaxsz = IP_MAXPACKET + sizeof(struct ether_vlan_header);
4470 	else
4471 		txmaxsz = BNX_JUMBO_FRAMELEN;
4472 	if (txr->bnx_sc->bnx_asicrev == BGE_ASICREV_BCM57766)
4473 		txmaxsegsz = MCLBYTES;
4474 	else
4475 		txmaxsegsz = PAGE_SIZE;
4476 	error = bus_dma_tag_create(txr->bnx_sc->bnx_cdata.bnx_parent_tag,
4477 	    1, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
4478 	    txmaxsz, BNX_NSEG_NEW, txmaxsegsz,
4479 	    BUS_DMA_ALLOCNOW | BUS_DMA_WAITOK | BUS_DMA_ONEBPAGE,
4480 	    &txr->bnx_tx_mtag);
4481 	if (error) {
4482 		device_printf(txr->bnx_sc->bnx_dev,
4483 		    "could not create TX mbuf DMA tag\n");
4484 		return error;
4485 	}
4486 
4487 	for (i = 0; i < BGE_TX_RING_CNT; i++) {
4488 		error = bus_dmamap_create(txr->bnx_tx_mtag,
4489 		    BUS_DMA_WAITOK | BUS_DMA_ONEBPAGE,
4490 		    &txr->bnx_tx_buf[i].bnx_tx_dmamap);
4491 		if (error) {
4492 			int j;
4493 
4494 			for (j = 0; j < i; ++j) {
4495 				bus_dmamap_destroy(txr->bnx_tx_mtag,
4496 				    txr->bnx_tx_buf[j].bnx_tx_dmamap);
4497 			}
4498 			bus_dma_tag_destroy(txr->bnx_tx_mtag);
4499 			txr->bnx_tx_mtag = NULL;
4500 
4501 			device_printf(txr->bnx_sc->bnx_dev,
4502 			    "could not create TX mbuf DMA map\n");
4503 			return error;
4504 		}
4505 	}
4506 
4507 	/*
4508 	 * Create DMA stuffs for TX ring.
4509 	 */
4510 	error = bnx_dma_block_alloc(txr->bnx_sc, BGE_TX_RING_SZ,
4511 	    &txr->bnx_tx_ring_tag,
4512 	    &txr->bnx_tx_ring_map,
4513 	    (void *)&txr->bnx_tx_ring,
4514 	    &txr->bnx_tx_ring_paddr);
4515 	if (error) {
4516 		device_printf(txr->bnx_sc->bnx_dev,
4517 		    "could not create TX ring\n");
4518 		return error;
4519 	}
4520 
4521 	txr->bnx_tx_flags |= BNX_TX_FLAG_SHORTDMA;
4522 	txr->bnx_tx_wreg = BNX_TX_WREG_NSEGS;
4523 
4524 	return 0;
4525 }
4526 
4527 static void
4528 bnx_destroy_tx_ring(struct bnx_tx_ring *txr)
4529 {
4530 	/* Destroy TX mbuf DMA stuffs. */
4531 	if (txr->bnx_tx_mtag != NULL) {
4532 		int i;
4533 
4534 		for (i = 0; i < BGE_TX_RING_CNT; i++) {
4535 			KKASSERT(txr->bnx_tx_buf[i].bnx_tx_mbuf == NULL);
4536 			bus_dmamap_destroy(txr->bnx_tx_mtag,
4537 			    txr->bnx_tx_buf[i].bnx_tx_dmamap);
4538 		}
4539 		bus_dma_tag_destroy(txr->bnx_tx_mtag);
4540 	}
4541 
4542 	/* Destroy TX ring */
4543 	bnx_dma_block_free(txr->bnx_tx_ring_tag,
4544 	    txr->bnx_tx_ring_map, txr->bnx_tx_ring);
4545 }
4546 
4547 static int
4548 bnx_sysctl_force_defrag(SYSCTL_HANDLER_ARGS)
4549 {
4550 	struct bnx_softc *sc = (void *)arg1;
4551 	struct ifnet *ifp = &sc->arpcom.ac_if;
4552 	struct bnx_tx_ring *txr = &sc->bnx_tx_ring[0];
4553 	int error, defrag, i;
4554 
4555 	if (txr->bnx_tx_flags & BNX_TX_FLAG_FORCE_DEFRAG)
4556 		defrag = 1;
4557 	else
4558 		defrag = 0;
4559 
4560 	error = sysctl_handle_int(oidp, &defrag, 0, req);
4561 	if (error || req->newptr == NULL)
4562 		return error;
4563 
4564 	ifnet_serialize_all(ifp);
4565 	for (i = 0; i < sc->bnx_tx_ringcnt; ++i) {
4566 		txr = &sc->bnx_tx_ring[i];
4567 		if (defrag)
4568 			txr->bnx_tx_flags |= BNX_TX_FLAG_FORCE_DEFRAG;
4569 		else
4570 			txr->bnx_tx_flags &= ~BNX_TX_FLAG_FORCE_DEFRAG;
4571 	}
4572 	ifnet_deserialize_all(ifp);
4573 
4574 	return 0;
4575 }
4576 
4577 static int
4578 bnx_sysctl_tx_wreg(SYSCTL_HANDLER_ARGS)
4579 {
4580 	struct bnx_softc *sc = (void *)arg1;
4581 	struct ifnet *ifp = &sc->arpcom.ac_if;
4582 	struct bnx_tx_ring *txr = &sc->bnx_tx_ring[0];
4583 	int error, tx_wreg, i;
4584 
4585 	tx_wreg = txr->bnx_tx_wreg;
4586 	error = sysctl_handle_int(oidp, &tx_wreg, 0, req);
4587 	if (error || req->newptr == NULL)
4588 		return error;
4589 
4590 	ifnet_serialize_all(ifp);
4591 	for (i = 0; i < sc->bnx_tx_ringcnt; ++i)
4592 		sc->bnx_tx_ring[i].bnx_tx_wreg = tx_wreg;
4593 	ifnet_deserialize_all(ifp);
4594 
4595 	return 0;
4596 }
4597 
4598 static int
4599 bnx_create_rx_ret_ring(struct bnx_rx_ret_ring *ret)
4600 {
4601 	int error;
4602 
4603 	lwkt_serialize_init(&ret->bnx_rx_ret_serialize);
4604 
4605 	/*
4606 	 * Create DMA stuffs for RX return ring.
4607 	 */
4608 	error = bnx_dma_block_alloc(ret->bnx_sc,
4609 	    BGE_RX_RTN_RING_SZ(BNX_RETURN_RING_CNT),
4610 	    &ret->bnx_rx_ret_ring_tag,
4611 	    &ret->bnx_rx_ret_ring_map,
4612 	    (void *)&ret->bnx_rx_ret_ring,
4613 	    &ret->bnx_rx_ret_ring_paddr);
4614 	if (error) {
4615 		device_printf(ret->bnx_sc->bnx_dev,
4616 		    "could not create RX ret ring\n");
4617 		return error;
4618 	}
4619 
4620 	/* Shadow standard ring's RX mbuf DMA tag */
4621 	ret->bnx_rx_mtag = ret->bnx_std->bnx_rx_mtag;
4622 
4623 	/*
4624 	 * Create tmp DMA map for RX mbufs.
4625 	 */
4626 	error = bus_dmamap_create(ret->bnx_rx_mtag, BUS_DMA_WAITOK,
4627 	    &ret->bnx_rx_tmpmap);
4628 	if (error) {
4629 		device_printf(ret->bnx_sc->bnx_dev,
4630 		    "could not create tmp RX mbuf DMA map\n");
4631 		ret->bnx_rx_mtag = NULL;
4632 		return error;
4633 	}
4634 	return 0;
4635 }
4636 
4637 static void
4638 bnx_destroy_rx_ret_ring(struct bnx_rx_ret_ring *ret)
4639 {
4640 	/* Destroy tmp RX mbuf DMA map */
4641 	if (ret->bnx_rx_mtag != NULL)
4642 		bus_dmamap_destroy(ret->bnx_rx_mtag, ret->bnx_rx_tmpmap);
4643 
4644 	/* Destroy RX return ring */
4645 	bnx_dma_block_free(ret->bnx_rx_ret_ring_tag,
4646 	    ret->bnx_rx_ret_ring_map, ret->bnx_rx_ret_ring);
4647 }
4648 
4649 static int
4650 bnx_alloc_intr(struct bnx_softc *sc)
4651 {
4652 	struct bnx_intr_data *intr;
4653 	u_int intr_flags;
4654 
4655 	KKASSERT(sc->bnx_intr_cnt == 1);
4656 
4657 	intr = &sc->bnx_intr_data[0];
4658 	intr->bnx_sc = sc;
4659 	intr->bnx_ret = &sc->bnx_rx_ret_ring[0];
4660 	intr->bnx_txr = &sc->bnx_tx_ring[0];
4661 	intr->bnx_intr_serialize = &sc->bnx_main_serialize;
4662 	callout_init_mp(&intr->bnx_intr_timer);
4663 	intr->bnx_intr_check = bnx_check_intr;
4664 	intr->bnx_saved_status_tag = &intr->bnx_ret->bnx_saved_status_tag;
4665 	intr->bnx_intr_mbx = BGE_MBX_IRQ0_LO;
4666 
4667 	sc->bnx_intr_type = pci_alloc_1intr(sc->bnx_dev, bnx_msi_enable,
4668 	    &intr->bnx_intr_rid, &intr_flags);
4669 
4670 	intr->bnx_intr_res = bus_alloc_resource_any(sc->bnx_dev, SYS_RES_IRQ,
4671 	    &intr->bnx_intr_rid, intr_flags);
4672 	if (intr->bnx_intr_res == NULL) {
4673 		device_printf(sc->bnx_dev, "could not alloc interrupt\n");
4674 		return ENXIO;
4675 	}
4676 
4677 	if (sc->bnx_intr_type == PCI_INTR_TYPE_MSI) {
4678 		bnx_enable_msi(sc);
4679 		intr->bnx_intr_func = bnx_msi;
4680 		if (bootverbose)
4681 			device_printf(sc->bnx_dev, "oneshot MSI\n");
4682 	} else {
4683 		intr->bnx_intr_func = bnx_intr_legacy;
4684 	}
4685 	intr->bnx_intr_arg = sc;
4686 	intr->bnx_intr_cpuid = rman_get_cpuid(intr->bnx_intr_res);
4687 
4688 	intr->bnx_txr->bnx_tx_cpuid = intr->bnx_intr_cpuid;
4689 
4690 	return 0;
4691 }
4692 
4693 static int
4694 bnx_setup_intr(struct bnx_softc *sc)
4695 {
4696 	int error, i;
4697 
4698 	for (i = 0; i < sc->bnx_intr_cnt; ++i) {
4699 		struct bnx_intr_data *intr = &sc->bnx_intr_data[i];
4700 
4701 		error = bus_setup_intr_descr(sc->bnx_dev, intr->bnx_intr_res,
4702 		    INTR_MPSAFE, intr->bnx_intr_func, intr->bnx_intr_arg,
4703 		    &intr->bnx_intr_hand, intr->bnx_intr_serialize,
4704 		    intr->bnx_intr_desc);
4705 		if (error) {
4706 			device_printf(sc->bnx_dev,
4707 			    "could not set up %dth intr\n", i);
4708 			bnx_teardown_intr(sc, i);
4709 			return error;
4710 		}
4711 	}
4712 	return 0;
4713 }
4714 
4715 static void
4716 bnx_teardown_intr(struct bnx_softc *sc, int cnt)
4717 {
4718 	int i;
4719 
4720 	for (i = 0; i < cnt; ++i) {
4721 		struct bnx_intr_data *intr = &sc->bnx_intr_data[i];
4722 
4723 		bus_teardown_intr(sc->bnx_dev, intr->bnx_intr_res,
4724 		    intr->bnx_intr_hand);
4725 	}
4726 }
4727 
4728 static void
4729 bnx_free_intr(struct bnx_softc *sc)
4730 {
4731 	struct bnx_intr_data *intr;
4732 
4733 	KKASSERT(sc->bnx_intr_cnt <= 1);
4734 	intr = &sc->bnx_intr_data[0];
4735 
4736 	if (intr->bnx_intr_res != NULL) {
4737 		bus_release_resource(sc->bnx_dev, SYS_RES_IRQ,
4738 		    intr->bnx_intr_rid, intr->bnx_intr_res);
4739 	}
4740 	if (sc->bnx_intr_type == PCI_INTR_TYPE_MSI)
4741 		pci_release_msi(sc->bnx_dev);
4742 }
4743 
4744 static void
4745 bnx_setup_serialize(struct bnx_softc *sc)
4746 {
4747 	int i, j;
4748 
4749 	/*
4750 	 * Allocate serializer array
4751 	 */
4752 
4753 	/* Main + RX STD + TX + RX RET */
4754 	sc->bnx_serialize_cnt = 1 + 1 + sc->bnx_tx_ringcnt + sc->bnx_rx_retcnt;
4755 
4756 	sc->bnx_serialize =
4757 	    kmalloc(sc->bnx_serialize_cnt * sizeof(struct lwkt_serialize *),
4758 	        M_DEVBUF, M_WAITOK | M_ZERO);
4759 
4760 	/*
4761 	 * Setup serializers
4762 	 *
4763 	 * NOTE: Order is critical
4764 	 */
4765 
4766 	i = 0;
4767 
4768 	KKASSERT(i < sc->bnx_serialize_cnt);
4769 	sc->bnx_serialize[i++] = &sc->bnx_main_serialize;
4770 
4771 	KKASSERT(i < sc->bnx_serialize_cnt);
4772 	sc->bnx_serialize[i++] = &sc->bnx_rx_std_ring.bnx_rx_std_serialize;
4773 
4774 	for (j = 0; j < sc->bnx_rx_retcnt; ++j) {
4775 		KKASSERT(i < sc->bnx_serialize_cnt);
4776 		sc->bnx_serialize[i++] =
4777 		    &sc->bnx_rx_ret_ring[j].bnx_rx_ret_serialize;
4778 	}
4779 
4780 	for (j = 0; j < sc->bnx_tx_ringcnt; ++j) {
4781 		KKASSERT(i < sc->bnx_serialize_cnt);
4782 		sc->bnx_serialize[i++] =
4783 		    &sc->bnx_tx_ring[j].bnx_tx_serialize;
4784 	}
4785 
4786 	KKASSERT(i == sc->bnx_serialize_cnt);
4787 }
4788 
4789 static void
4790 bnx_serialize(struct ifnet *ifp, enum ifnet_serialize slz)
4791 {
4792 	struct bnx_softc *sc = ifp->if_softc;
4793 
4794 	ifnet_serialize_array_enter(sc->bnx_serialize,
4795 	    sc->bnx_serialize_cnt, slz);
4796 }
4797 
4798 static void
4799 bnx_deserialize(struct ifnet *ifp, enum ifnet_serialize slz)
4800 {
4801 	struct bnx_softc *sc = ifp->if_softc;
4802 
4803 	ifnet_serialize_array_exit(sc->bnx_serialize,
4804 	    sc->bnx_serialize_cnt, slz);
4805 }
4806 
4807 static int
4808 bnx_tryserialize(struct ifnet *ifp, enum ifnet_serialize slz)
4809 {
4810 	struct bnx_softc *sc = ifp->if_softc;
4811 
4812 	return ifnet_serialize_array_try(sc->bnx_serialize,
4813 	    sc->bnx_serialize_cnt, slz);
4814 }
4815 
4816 #ifdef INVARIANTS
4817 
4818 static void
4819 bnx_serialize_assert(struct ifnet *ifp, enum ifnet_serialize slz,
4820     boolean_t serialized)
4821 {
4822 	struct bnx_softc *sc = ifp->if_softc;
4823 
4824 	ifnet_serialize_array_assert(sc->bnx_serialize, sc->bnx_serialize_cnt,
4825 	    slz, serialized);
4826 }
4827 
4828 #endif	/* INVARIANTS */
4829 
4830 #ifdef IFPOLL_ENABLE
4831 
4832 static int
4833 bnx_sysctl_npoll_offset(SYSCTL_HANDLER_ARGS)
4834 {
4835 	struct bnx_softc *sc = (void *)arg1;
4836 	struct ifnet *ifp = &sc->arpcom.ac_if;
4837 	int error, off;
4838 
4839 	off = sc->bnx_npoll_rxoff;
4840 	error = sysctl_handle_int(oidp, &off, 0, req);
4841 	if (error || req->newptr == NULL)
4842 		return error;
4843 	if (off < 0)
4844 		return EINVAL;
4845 
4846 	ifnet_serialize_all(ifp);
4847 	if (off >= ncpus2 || off % sc->bnx_rx_retcnt != 0) {
4848 		error = EINVAL;
4849 	} else {
4850 		error = 0;
4851 		sc->bnx_npoll_txoff = off;
4852 		sc->bnx_npoll_rxoff = off;
4853 	}
4854 	ifnet_deserialize_all(ifp);
4855 
4856 	return error;
4857 }
4858 
4859 static int
4860 bnx_sysctl_npoll_rxoff(SYSCTL_HANDLER_ARGS)
4861 {
4862 	struct bnx_softc *sc = (void *)arg1;
4863 	struct ifnet *ifp = &sc->arpcom.ac_if;
4864 	int error, off;
4865 
4866 	off = sc->bnx_npoll_rxoff;
4867 	error = sysctl_handle_int(oidp, &off, 0, req);
4868 	if (error || req->newptr == NULL)
4869 		return error;
4870 	if (off < 0)
4871 		return EINVAL;
4872 
4873 	ifnet_serialize_all(ifp);
4874 	if (off >= ncpus2 || off % sc->bnx_rx_retcnt != 0) {
4875 		error = EINVAL;
4876 	} else {
4877 		error = 0;
4878 		sc->bnx_npoll_rxoff = off;
4879 	}
4880 	ifnet_deserialize_all(ifp);
4881 
4882 	return error;
4883 }
4884 
4885 static int
4886 bnx_sysctl_npoll_txoff(SYSCTL_HANDLER_ARGS)
4887 {
4888 	struct bnx_softc *sc = (void *)arg1;
4889 	struct ifnet *ifp = &sc->arpcom.ac_if;
4890 	int error, off;
4891 
4892 	off = sc->bnx_npoll_txoff;
4893 	error = sysctl_handle_int(oidp, &off, 0, req);
4894 	if (error || req->newptr == NULL)
4895 		return error;
4896 	if (off < 0)
4897 		return EINVAL;
4898 
4899 	ifnet_serialize_all(ifp);
4900 	if (off >= ncpus2) {
4901 		error = EINVAL;
4902 	} else {
4903 		error = 0;
4904 		sc->bnx_npoll_txoff = off;
4905 	}
4906 	ifnet_deserialize_all(ifp);
4907 
4908 	return error;
4909 }
4910 
4911 #endif	/* IFPOLL_ENABLE */
4912 
4913 static void
4914 bnx_set_tick_cpuid(struct bnx_softc *sc, boolean_t polling)
4915 {
4916 	if (polling)
4917 		sc->bnx_tick_cpuid = 0; /* XXX */
4918 	else
4919 		sc->bnx_tick_cpuid = sc->bnx_intr_data[0].bnx_intr_cpuid;
4920 }
4921 
4922 static void
4923 bnx_rx_std_refill_ithread(void *xstd)
4924 {
4925 	struct bnx_rx_std_ring *std = xstd;
4926 	struct globaldata *gd = mycpu;
4927 
4928 	crit_enter_gd(gd);
4929 
4930 	while (!std->bnx_rx_std_stop) {
4931 		if (std->bnx_rx_std_refill) {
4932 			lwkt_serialize_handler_call(
4933 			    &std->bnx_rx_std_serialize,
4934 			    bnx_rx_std_refill, std, NULL);
4935 		}
4936 
4937 		crit_exit_gd(gd);
4938 		crit_enter_gd(gd);
4939 
4940 		if (!std->bnx_rx_std_refill && !std->bnx_rx_std_stop) {
4941 			lwkt_deschedule_self(gd->gd_curthread);
4942 			lwkt_switch();
4943 		}
4944 	}
4945 
4946 	crit_exit_gd(gd);
4947 
4948 	wakeup(std);
4949 
4950 	lwkt_exit();
4951 }
4952 
4953 static void
4954 bnx_rx_std_refill(void *xstd, void *frame __unused)
4955 {
4956 	struct bnx_rx_std_ring *std = xstd;
4957 	uint16_t check_idx;
4958 	int cnt, refill;
4959 
4960 again:
4961 	cnt = 0;
4962 	check_idx = std->bnx_rx_std;
4963 
4964 	cpu_lfence();
4965 	refill = std->bnx_rx_std_refill;
4966 	atomic_clear_int(&std->bnx_rx_std_refill, refill);
4967 
4968 	for (;;) {
4969 		struct bnx_rx_buf *rb;
4970 
4971 		BNX_INC(check_idx, BGE_STD_RX_RING_CNT);
4972 		rb = &std->bnx_rx_std_buf[check_idx];
4973 
4974 		if (rb->bnx_rx_refilled) {
4975 			cpu_lfence();
4976 			bnx_setup_rxdesc_std(std, check_idx);
4977 			std->bnx_rx_std = check_idx;
4978 			++cnt;
4979 		} else {
4980 			break;
4981 		}
4982 	}
4983 
4984 	if (cnt) {
4985 		bnx_writembx(std->bnx_sc, BGE_MBX_RX_STD_PROD_LO,
4986 		    std->bnx_rx_std);
4987 	}
4988 
4989 	if (std->bnx_rx_std_refill)
4990 		goto again;
4991 
4992 	atomic_poll_release_int(&std->bnx_rx_std_running);
4993 	cpu_mfence();
4994 
4995 	if (std->bnx_rx_std_refill)
4996 		goto again;
4997 }
4998 
4999 static int
5000 bnx_sysctl_std_refill(SYSCTL_HANDLER_ARGS)
5001 {
5002 	struct bnx_softc *sc = (void *)arg1;
5003 	struct ifnet *ifp = &sc->arpcom.ac_if;
5004 	struct bnx_rx_ret_ring *ret = &sc->bnx_rx_ret_ring[0];
5005 	int error, cntmax, i;
5006 
5007 	cntmax = ret->bnx_rx_cntmax;
5008 	error = sysctl_handle_int(oidp, &cntmax, 0, req);
5009 	if (error || req->newptr == NULL)
5010 		return error;
5011 
5012 	ifnet_serialize_all(ifp);
5013 
5014 	if ((cntmax * sc->bnx_rx_retcnt) > BGE_STD_RX_RING_CNT / 2) {
5015 		error = EINVAL;
5016 		goto back;
5017 	}
5018 
5019 	for (i = 0; i < sc->bnx_tx_ringcnt; ++i)
5020 		sc->bnx_rx_ret_ring[i].bnx_rx_cntmax = cntmax;
5021 	error = 0;
5022 
5023 back:
5024 	ifnet_deserialize_all(ifp);
5025 
5026 	return error;
5027 }
5028