xref: /netbsd-src/sys/dev/pci/if_xge.c (revision 7f21db1c0118155e0dd40b75182e30c589d9f63e)
1 /*      $NetBSD: if_xge.c,v 1.14 2010/01/19 22:07:02 pooka Exp $ */
2 
3 /*
4  * Copyright (c) 2004, SUNET, Swedish University Computer Network.
5  * All rights reserved.
6  *
7  * Written by Anders Magnusson for SUNET, Swedish University Computer Network.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. All advertising materials mentioning features or use of this software
18  *    must display the following acknowledgement:
19  *      This product includes software developed for the NetBSD Project by
20  *      SUNET, Swedish University Computer Network.
21  * 4. The name of SUNET may not be used to endorse or promote products
22  *    derived from this software without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY SUNET ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
26  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
27  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL SUNET
28  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34  * POSSIBILITY OF SUCH DAMAGE.
35  */
36 
37 /*
38  * Device driver for the S2io Xframe Ten Gigabit Ethernet controller.
39  *
40  * TODO (in no specific order):
41  *	HW VLAN support.
42  *	IPv6 HW cksum.
43  */
44 
45 #include <sys/cdefs.h>
46 __KERNEL_RCSID(0, "$NetBSD: if_xge.c,v 1.14 2010/01/19 22:07:02 pooka Exp $");
47 
48 #include "rnd.h"
49 
50 #include <sys/param.h>
51 #include <sys/systm.h>
52 #include <sys/mbuf.h>
53 #include <sys/malloc.h>
54 #include <sys/kernel.h>
55 #include <sys/socket.h>
56 #include <sys/device.h>
57 
58 #if NRND > 0
59 #include <sys/rnd.h>
60 #endif
61 
62 #include <net/if.h>
63 #include <net/if_dl.h>
64 #include <net/if_media.h>
65 #include <net/if_ether.h>
66 
67 #include <net/bpf.h>
68 
69 #include <sys/bus.h>
70 #include <sys/intr.h>
71 #include <machine/endian.h>
72 
73 #include <dev/mii/mii.h>
74 #include <dev/mii/miivar.h>
75 
76 #include <dev/pci/pcivar.h>
77 #include <dev/pci/pcireg.h>
78 #include <dev/pci/pcidevs.h>
79 
80 #include <sys/proc.h>
81 
82 #include <dev/pci/if_xgereg.h>
83 
84 /*
85  * Some tunable constants, tune with care!
86  */
87 #define RX_MODE		RX_MODE_1  /* Receive mode (buffer usage, see below) */
88 #define NRXDESCS	1016	   /* # of receive descriptors (requested) */
89 #define NTXDESCS	8192	   /* Number of transmit descriptors */
90 #define NTXFRAGS	100	   /* Max fragments per packet */
91 #define XGE_EVENT_COUNTERS	   /* Instrumentation */
92 
93 /*
94  * Receive buffer modes; 1, 3 or 5 buffers.
95  */
96 #define RX_MODE_1 1
97 #define RX_MODE_3 3
98 #define RX_MODE_5 5
99 
100 /*
101  * Use clever macros to avoid a bunch of #ifdef's.
102  */
103 #define XCONCAT3(x,y,z) x ## y ## z
104 #define CONCAT3(x,y,z) XCONCAT3(x,y,z)
105 #define NDESC_BUFMODE CONCAT3(NDESC_,RX_MODE,BUFMODE)
106 #define rxd_4k CONCAT3(rxd,RX_MODE,_4k)
107 #define rxdesc ___CONCAT(rxd,RX_MODE)
108 
109 #define NEXTTX(x)	(((x)+1) % NTXDESCS)
110 #define NRXFRAGS	RX_MODE /* hardware imposed frags */
111 #define NRXPAGES	((NRXDESCS/NDESC_BUFMODE)+1)
112 #define NRXREAL		(NRXPAGES*NDESC_BUFMODE)
113 #define RXMAPSZ		(NRXPAGES*PAGE_SIZE)
114 
115 #ifdef XGE_EVENT_COUNTERS
116 #define XGE_EVCNT_INCR(ev)	(ev)->ev_count++
117 #else
118 #define XGE_EVCNT_INCR(ev)	/* nothing */
119 #endif
120 
121 /*
122  * Magics to fix a bug when the mac address can't be read correctly.
123  * Comes from the Linux driver.
124  */
125 static uint64_t fix_mac[] = {
126 	0x0060000000000000ULL, 0x0060600000000000ULL,
127 	0x0040600000000000ULL, 0x0000600000000000ULL,
128 	0x0020600000000000ULL, 0x0060600000000000ULL,
129 	0x0020600000000000ULL, 0x0060600000000000ULL,
130 	0x0020600000000000ULL, 0x0060600000000000ULL,
131 	0x0020600000000000ULL, 0x0060600000000000ULL,
132 	0x0020600000000000ULL, 0x0060600000000000ULL,
133 	0x0020600000000000ULL, 0x0060600000000000ULL,
134 	0x0020600000000000ULL, 0x0060600000000000ULL,
135 	0x0020600000000000ULL, 0x0060600000000000ULL,
136 	0x0020600000000000ULL, 0x0060600000000000ULL,
137 	0x0020600000000000ULL, 0x0060600000000000ULL,
138 	0x0020600000000000ULL, 0x0000600000000000ULL,
139 	0x0040600000000000ULL, 0x0060600000000000ULL,
140 };
141 
142 
143 struct xge_softc {
144 	struct device sc_dev;
145 	struct ethercom sc_ethercom;
146 #define sc_if sc_ethercom.ec_if
147 	bus_dma_tag_t sc_dmat;
148 	bus_space_tag_t sc_st;
149 	bus_space_handle_t sc_sh;
150 	bus_space_tag_t sc_txt;
151 	bus_space_handle_t sc_txh;
152 	void *sc_ih;
153 
154 	struct ifmedia xena_media;
155 	pcireg_t sc_pciregs[16];
156 
157 	/* Transmit structures */
158 	struct txd *sc_txd[NTXDESCS];	/* transmit frags array */
159 	bus_addr_t sc_txdp[NTXDESCS];	/* bus address of transmit frags */
160 	bus_dmamap_t sc_txm[NTXDESCS];	/* transmit frags map */
161 	struct mbuf *sc_txb[NTXDESCS];	/* transmit mbuf pointer */
162 	int sc_nexttx, sc_lasttx;
163 	bus_dmamap_t sc_txmap;		/* transmit descriptor map */
164 
165 	/* Receive data */
166 	bus_dmamap_t sc_rxmap;		/* receive descriptor map */
167 	struct rxd_4k *sc_rxd_4k[NRXPAGES]; /* receive desc pages */
168 	bus_dmamap_t sc_rxm[NRXREAL];	/* receive buffer map */
169 	struct mbuf *sc_rxb[NRXREAL];	/* mbufs on receive descriptors */
170 	int sc_nextrx;			/* next descriptor to check */
171 
172 #ifdef XGE_EVENT_COUNTERS
173 	struct evcnt sc_intr;	/* # of interrupts */
174 	struct evcnt sc_txintr;	/* # of transmit interrupts */
175 	struct evcnt sc_rxintr;	/* # of receive interrupts */
176 	struct evcnt sc_txqe;	/* # of xmit intrs when board queue empty */
177 #endif
178 };
179 
180 static int xge_match(device_t parent, cfdata_t cf, void *aux);
181 static void xge_attach(device_t parent, device_t self, void *aux);
182 static int xge_alloc_txmem(struct xge_softc *);
183 static int xge_alloc_rxmem(struct xge_softc *);
184 static void xge_start(struct ifnet *);
185 static void xge_stop(struct ifnet *, int);
186 static int xge_add_rxbuf(struct xge_softc *, int);
187 static void xge_mcast_filter(struct xge_softc *sc);
188 static int xge_setup_xgxs(struct xge_softc *sc);
189 static int xge_ioctl(struct ifnet *ifp, u_long cmd, void *data);
190 static int xge_init(struct ifnet *ifp);
191 static void xge_ifmedia_status(struct ifnet *, struct ifmediareq *);
192 static int xge_xgmii_mediachange(struct ifnet *);
193 static int xge_intr(void  *);
194 
195 /*
196  * Helpers to address registers.
197  */
198 #define PIF_WCSR(csr, val)	pif_wcsr(sc, csr, val)
199 #define PIF_RCSR(csr)		pif_rcsr(sc, csr)
200 #define TXP_WCSR(csr, val)	txp_wcsr(sc, csr, val)
201 #define PIF_WKEY(csr, val)	pif_wkey(sc, csr, val)
202 
203 static inline void
204 pif_wcsr(struct xge_softc *sc, bus_size_t csr, uint64_t val)
205 {
206 	uint32_t lval, hval;
207 
208 	lval = val&0xffffffff;
209 	hval = val>>32;
210 	bus_space_write_4(sc->sc_st, sc->sc_sh, csr, lval);
211 	bus_space_write_4(sc->sc_st, sc->sc_sh, csr+4, hval);
212 }
213 
214 static inline uint64_t
215 pif_rcsr(struct xge_softc *sc, bus_size_t csr)
216 {
217 	uint64_t val, val2;
218 	val = bus_space_read_4(sc->sc_st, sc->sc_sh, csr);
219 	val2 = bus_space_read_4(sc->sc_st, sc->sc_sh, csr+4);
220 	val |= (val2 << 32);
221 	return val;
222 }
223 
224 static inline void
225 txp_wcsr(struct xge_softc *sc, bus_size_t csr, uint64_t val)
226 {
227 	uint32_t lval, hval;
228 
229 	lval = val&0xffffffff;
230 	hval = val>>32;
231 	bus_space_write_4(sc->sc_txt, sc->sc_txh, csr, lval);
232 	bus_space_write_4(sc->sc_txt, sc->sc_txh, csr+4, hval);
233 }
234 
235 
236 static inline void
237 pif_wkey(struct xge_softc *sc, bus_size_t csr, uint64_t val)
238 {
239 	uint32_t lval, hval;
240 
241 	lval = val&0xffffffff;
242 	hval = val>>32;
243 	PIF_WCSR(RMAC_CFG_KEY, RMAC_KEY_VALUE);
244 	bus_space_write_4(sc->sc_st, sc->sc_sh, csr, lval);
245 	PIF_WCSR(RMAC_CFG_KEY, RMAC_KEY_VALUE);
246 	bus_space_write_4(sc->sc_st, sc->sc_sh, csr+4, hval);
247 }
248 
249 
250 CFATTACH_DECL(xge, sizeof(struct xge_softc),
251     xge_match, xge_attach, NULL, NULL);
252 
253 #define XNAME device_xname(&sc->sc_dev)
254 
255 #define XGE_RXSYNC(desc, what) \
256 	bus_dmamap_sync(sc->sc_dmat, sc->sc_rxmap, \
257 	(desc/NDESC_BUFMODE) * XGE_PAGE + sizeof(struct rxdesc) * \
258 	(desc%NDESC_BUFMODE), sizeof(struct rxdesc), what)
259 #define XGE_RXD(desc)	&sc->sc_rxd_4k[desc/NDESC_BUFMODE]-> \
260 	r4_rxd[desc%NDESC_BUFMODE]
261 
262 /*
263  * Non-tunable constants.
264  */
265 #define XGE_MAX_MTU		9600
266 #define	XGE_IP_MAXPACKET	65535	/* same as IP_MAXPACKET */
267 
268 static int
269 xge_match(device_t parent, cfdata_t cf, void *aux)
270 {
271 	struct pci_attach_args *pa = aux;
272 
273 	if (PCI_VENDOR(pa->pa_id) == PCI_VENDOR_S2IO &&
274 	    PCI_PRODUCT(pa->pa_id) == PCI_PRODUCT_S2IO_XFRAME)
275 		return (1);
276 
277 	return (0);
278 }
279 
280 void
281 xge_attach(device_t parent, device_t self, void *aux)
282 {
283 	struct pci_attach_args *pa = aux;
284 	struct xge_softc *sc;
285 	struct ifnet *ifp;
286 	pcireg_t memtype;
287 	pci_intr_handle_t ih;
288 	const char *intrstr = NULL;
289 	pci_chipset_tag_t pc = pa->pa_pc;
290 	uint8_t enaddr[ETHER_ADDR_LEN];
291 	uint64_t val;
292 	int i;
293 
294 	sc = device_private(self);
295 
296 	sc->sc_dmat = pa->pa_dmat;
297 
298 	/* Get BAR0 address */
299 	memtype = pci_mapreg_type(pa->pa_pc, pa->pa_tag, XGE_PIF_BAR);
300 	if (pci_mapreg_map(pa, XGE_PIF_BAR, memtype, 0,
301 	    &sc->sc_st, &sc->sc_sh, 0, 0)) {
302 		aprint_error("%s: unable to map PIF BAR registers\n", XNAME);
303 		return;
304 	}
305 
306 	memtype = pci_mapreg_type(pa->pa_pc, pa->pa_tag, XGE_TXP_BAR);
307 	if (pci_mapreg_map(pa, XGE_TXP_BAR, memtype, 0,
308 	    &sc->sc_txt, &sc->sc_txh, 0, 0)) {
309 		aprint_error("%s: unable to map TXP BAR registers\n", XNAME);
310 		return;
311 	}
312 
313 	/* Save PCI config space */
314 	for (i = 0; i < 64; i += 4)
315 		sc->sc_pciregs[i/4] = pci_conf_read(pa->pa_pc, pa->pa_tag, i);
316 
317 #if BYTE_ORDER == LITTLE_ENDIAN
318 	val = (uint64_t)0xFFFFFFFFFFFFFFFFULL;
319 	val &= ~(TxF_R_SE|RxF_W_SE);
320 	PIF_WCSR(SWAPPER_CTRL, val);
321 	PIF_WCSR(SWAPPER_CTRL, val);
322 #elif BYTE_ORDER == BIG_ENDIAN
323 	/* do nothing */
324 #else
325 #error bad endianness!
326 #endif
327 
328 	if ((val = PIF_RCSR(PIF_RD_SWAPPER_Fb)) != SWAPPER_MAGIC)
329 		return printf("%s: failed configuring endian, %llx != %llx!\n",
330 		    XNAME, (unsigned long long)val, SWAPPER_MAGIC);
331 
332 	/*
333 	 * The MAC addr may be all FF's, which is not good.
334 	 * Resolve it by writing some magics to GPIO_CONTROL and
335 	 * force a chip reset to read in the serial eeprom again.
336 	 */
337 	for (i = 0; i < sizeof(fix_mac)/sizeof(fix_mac[0]); i++) {
338 		PIF_WCSR(GPIO_CONTROL, fix_mac[i]);
339 		PIF_RCSR(GPIO_CONTROL);
340 	}
341 
342 	/*
343 	 * Reset the chip and restore the PCI registers.
344 	 */
345 	PIF_WCSR(SW_RESET, 0xa5a5a50000000000ULL);
346 	DELAY(500000);
347 	for (i = 0; i < 64; i += 4)
348 		pci_conf_write(pa->pa_pc, pa->pa_tag, i, sc->sc_pciregs[i/4]);
349 
350 	/*
351 	 * Restore the byte order registers.
352 	 */
353 #if BYTE_ORDER == LITTLE_ENDIAN
354 	val = (uint64_t)0xFFFFFFFFFFFFFFFFULL;
355 	val &= ~(TxF_R_SE|RxF_W_SE);
356 	PIF_WCSR(SWAPPER_CTRL, val);
357 	PIF_WCSR(SWAPPER_CTRL, val);
358 #elif BYTE_ORDER == BIG_ENDIAN
359 	/* do nothing */
360 #else
361 #error bad endianness!
362 #endif
363 
364 	if ((val = PIF_RCSR(PIF_RD_SWAPPER_Fb)) != SWAPPER_MAGIC)
365 		return printf("%s: failed configuring endian2, %llx != %llx!\n",
366 		    XNAME, (unsigned long long)val, SWAPPER_MAGIC);
367 
368 	/*
369 	 * XGXS initialization.
370 	 */
371 	/* 29, reset */
372 	PIF_WCSR(SW_RESET, 0);
373 	DELAY(500000);
374 
375 	/* 30, configure XGXS transceiver */
376 	xge_setup_xgxs(sc);
377 
378 	/* 33, program MAC address (not needed here) */
379 	/* Get ethernet address */
380 	PIF_WCSR(RMAC_ADDR_CMD_MEM,
381 	    RMAC_ADDR_CMD_MEM_STR|RMAC_ADDR_CMD_MEM_OFF(0));
382 	while (PIF_RCSR(RMAC_ADDR_CMD_MEM) & RMAC_ADDR_CMD_MEM_STR)
383 		;
384 	val = PIF_RCSR(RMAC_ADDR_DATA0_MEM);
385 	for (i = 0; i < ETHER_ADDR_LEN; i++)
386 		enaddr[i] = (uint8_t)(val >> (56 - (8*i)));
387 
388 	/*
389 	 * Get memory for transmit descriptor lists.
390 	 */
391 	if (xge_alloc_txmem(sc))
392 		return printf("%s: failed allocating txmem.\n", XNAME);
393 
394 	/* 9 and 10 - set FIFO number/prio */
395 	PIF_WCSR(TX_FIFO_P0, TX_FIFO_LEN0(NTXDESCS));
396 	PIF_WCSR(TX_FIFO_P1, 0ULL);
397 	PIF_WCSR(TX_FIFO_P2, 0ULL);
398 	PIF_WCSR(TX_FIFO_P3, 0ULL);
399 
400 	/* 11, XXX set round-robin prio? */
401 
402 	/* 12, enable transmit FIFO */
403 	val = PIF_RCSR(TX_FIFO_P0);
404 	val |= TX_FIFO_ENABLE;
405 	PIF_WCSR(TX_FIFO_P0, val);
406 
407 	/* 13, disable some error checks */
408 	PIF_WCSR(TX_PA_CFG,
409 	    TX_PA_CFG_IFR|TX_PA_CFG_ISO|TX_PA_CFG_ILC|TX_PA_CFG_ILE);
410 
411 	/*
412 	 * Create transmit DMA maps.
413 	 * Make them large for TSO.
414 	 */
415 	for (i = 0; i < NTXDESCS; i++) {
416 		if (bus_dmamap_create(sc->sc_dmat, XGE_IP_MAXPACKET,
417 		    NTXFRAGS, MCLBYTES, 0, 0, &sc->sc_txm[i]))
418 			return printf("%s: cannot create TX DMA maps\n", XNAME);
419 	}
420 
421 	sc->sc_lasttx = NTXDESCS-1;
422 
423 	/*
424 	 * RxDMA initialization.
425 	 * Only use one out of 8 possible receive queues.
426 	 */
427 	if (xge_alloc_rxmem(sc))	/* allocate rx descriptor memory */
428 		return printf("%s: failed allocating rxmem\n", XNAME);
429 
430 	/* Create receive buffer DMA maps */
431 	for (i = 0; i < NRXREAL; i++) {
432 		if (bus_dmamap_create(sc->sc_dmat, XGE_MAX_MTU,
433 		    NRXFRAGS, MCLBYTES, 0, 0, &sc->sc_rxm[i]))
434 			return printf("%s: cannot create RX DMA maps\n", XNAME);
435 	}
436 
437 	/* allocate mbufs to receive descriptors */
438 	for (i = 0; i < NRXREAL; i++)
439 		if (xge_add_rxbuf(sc, i))
440 			panic("out of mbufs too early");
441 
442 	/* 14, setup receive ring priority */
443 	PIF_WCSR(RX_QUEUE_PRIORITY, 0ULL); /* only use one ring */
444 
445 	/* 15, setup receive ring round-robin calendar */
446 	PIF_WCSR(RX_W_ROUND_ROBIN_0, 0ULL); /* only use one ring */
447 	PIF_WCSR(RX_W_ROUND_ROBIN_1, 0ULL);
448 	PIF_WCSR(RX_W_ROUND_ROBIN_2, 0ULL);
449 	PIF_WCSR(RX_W_ROUND_ROBIN_3, 0ULL);
450 	PIF_WCSR(RX_W_ROUND_ROBIN_4, 0ULL);
451 
452 	/* 16, write receive ring start address */
453 	PIF_WCSR(PRC_RXD0_0, (uint64_t)sc->sc_rxmap->dm_segs[0].ds_addr);
454 	/* PRC_RXD0_[1-7] are not used */
455 
456 	/* 17, Setup alarm registers */
457 	PIF_WCSR(PRC_ALARM_ACTION, 0ULL); /* Default everything to retry */
458 
459 	/* 18, init receive ring controller */
460 #if RX_MODE == RX_MODE_1
461 	val = RING_MODE_1;
462 #elif RX_MODE == RX_MODE_3
463 	val = RING_MODE_3;
464 #else /* RX_MODE == RX_MODE_5 */
465 	val = RING_MODE_5;
466 #endif
467 	PIF_WCSR(PRC_CTRL_0, RC_IN_SVC|val);
468 	/* leave 1-7 disabled */
469 	/* XXXX snoop configuration? */
470 
471 	/* 19, set chip memory assigned to the queue */
472 	PIF_WCSR(RX_QUEUE_CFG, MC_QUEUE(0, 64)); /* all 64M to queue 0 */
473 
474 	/* 20, setup RLDRAM parameters */
475 	/* do not touch it for now */
476 
477 	/* 21, setup pause frame thresholds */
478 	/* so not touch the defaults */
479 	/* XXX - must 0xff be written as stated in the manual? */
480 
481 	/* 22, configure RED */
482 	/* we do not want to drop packets, so ignore */
483 
484 	/* 23, initiate RLDRAM */
485 	val = PIF_RCSR(MC_RLDRAM_MRS);
486 	val |= MC_QUEUE_SIZE_ENABLE|MC_RLDRAM_MRS_ENABLE;
487 	PIF_WCSR(MC_RLDRAM_MRS, val);
488 	DELAY(1000);
489 
490 	/*
491 	 * Setup interrupt policies.
492 	 */
493 	/* 40, Transmit interrupts */
494 	PIF_WCSR(TTI_DATA1_MEM, TX_TIMER_VAL(0x1ff) | TX_TIMER_AC |
495 	    TX_URNG_A(5) | TX_URNG_B(20) | TX_URNG_C(48));
496 	PIF_WCSR(TTI_DATA2_MEM,
497 	    TX_UFC_A(25) | TX_UFC_B(64) | TX_UFC_C(128) | TX_UFC_D(512));
498 	PIF_WCSR(TTI_COMMAND_MEM, TTI_CMD_MEM_WE | TTI_CMD_MEM_STROBE);
499 	while (PIF_RCSR(TTI_COMMAND_MEM) & TTI_CMD_MEM_STROBE)
500 		;
501 
502 	/* 41, Receive interrupts */
503 	PIF_WCSR(RTI_DATA1_MEM, RX_TIMER_VAL(0x800) | RX_TIMER_AC |
504 	    RX_URNG_A(5) | RX_URNG_B(20) | RX_URNG_C(50));
505 	PIF_WCSR(RTI_DATA2_MEM,
506 	    RX_UFC_A(64) | RX_UFC_B(128) | RX_UFC_C(256) | RX_UFC_D(512));
507 	PIF_WCSR(RTI_COMMAND_MEM, RTI_CMD_MEM_WE | RTI_CMD_MEM_STROBE);
508 	while (PIF_RCSR(RTI_COMMAND_MEM) & RTI_CMD_MEM_STROBE)
509 		;
510 
511 	/*
512 	 * Setup media stuff.
513 	 */
514 	ifmedia_init(&sc->xena_media, IFM_IMASK, xge_xgmii_mediachange,
515 	    xge_ifmedia_status);
516 	ifmedia_add(&sc->xena_media, IFM_ETHER|IFM_10G_LR, 0, NULL);
517 	ifmedia_set(&sc->xena_media, IFM_ETHER|IFM_10G_LR);
518 
519 	aprint_normal("%s: Ethernet address %s\n", XNAME,
520 	    ether_sprintf(enaddr));
521 
522 	ifp = &sc->sc_ethercom.ec_if;
523 	strlcpy(ifp->if_xname, device_xname(&sc->sc_dev), IFNAMSIZ);
524 	ifp->if_baudrate = 10000000000LL;
525 	ifp->if_init = xge_init;
526 	ifp->if_stop = xge_stop;
527 	ifp->if_softc = sc;
528 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
529 	ifp->if_ioctl = xge_ioctl;
530 	ifp->if_start = xge_start;
531 	IFQ_SET_MAXLEN(&ifp->if_snd, max(NTXDESCS - 1, IFQ_MAXLEN));
532 	IFQ_SET_READY(&ifp->if_snd);
533 
534 	/*
535 	 * Offloading capabilities.
536 	 */
537 	sc->sc_ethercom.ec_capabilities |=
538 	    ETHERCAP_JUMBO_MTU | ETHERCAP_VLAN_MTU;
539 	ifp->if_capabilities |=
540 	    IFCAP_CSUM_IPv4_Rx | IFCAP_CSUM_IPv4_Tx |
541 	    IFCAP_CSUM_TCPv4_Rx | IFCAP_CSUM_TCPv4_Tx |
542 	    IFCAP_CSUM_UDPv4_Rx | IFCAP_CSUM_UDPv4_Tx | IFCAP_TSOv4;
543 
544 	/*
545 	 * Attach the interface.
546 	 */
547 	if_attach(ifp);
548 	ether_ifattach(ifp, enaddr);
549 
550 	/*
551 	 * Setup interrupt vector before initializing.
552 	 */
553 	if (pci_intr_map(pa, &ih))
554 		return aprint_error_dev(&sc->sc_dev, "unable to map interrupt\n");
555 	intrstr = pci_intr_string(pc, ih);
556 	if ((sc->sc_ih =
557 	    pci_intr_establish(pc, ih, IPL_NET, xge_intr, sc)) == NULL)
558 		return aprint_error_dev(&sc->sc_dev, "unable to establish interrupt at %s\n",
559 		    intrstr ? intrstr : "<unknown>");
560 	aprint_normal_dev(&sc->sc_dev, "interrupting at %s\n", intrstr);
561 
562 #ifdef XGE_EVENT_COUNTERS
563 	evcnt_attach_dynamic(&sc->sc_intr, EVCNT_TYPE_MISC,
564 	    NULL, XNAME, "intr");
565 	evcnt_attach_dynamic(&sc->sc_txintr, EVCNT_TYPE_MISC,
566 	    NULL, XNAME, "txintr");
567 	evcnt_attach_dynamic(&sc->sc_rxintr, EVCNT_TYPE_MISC,
568 	    NULL, XNAME, "rxintr");
569 	evcnt_attach_dynamic(&sc->sc_txqe, EVCNT_TYPE_MISC,
570 	    NULL, XNAME, "txqe");
571 #endif
572 }
573 
574 void
575 xge_ifmedia_status(struct ifnet *ifp, struct ifmediareq *ifmr)
576 {
577 	struct xge_softc *sc = ifp->if_softc;
578 	uint64_t reg;
579 
580 	ifmr->ifm_status = IFM_AVALID;
581 	ifmr->ifm_active = IFM_ETHER|IFM_10G_LR;
582 
583 	reg = PIF_RCSR(ADAPTER_STATUS);
584 	if ((reg & (RMAC_REMOTE_FAULT|RMAC_LOCAL_FAULT)) == 0)
585 		ifmr->ifm_status |= IFM_ACTIVE;
586 }
587 
588 int
589 xge_xgmii_mediachange(struct ifnet *ifp)
590 {
591 	return 0;
592 }
593 
594 static void
595 xge_enable(struct xge_softc *sc)
596 {
597 	uint64_t val;
598 
599 	/* 2, enable adapter */
600 	val = PIF_RCSR(ADAPTER_CONTROL);
601 	val |= ADAPTER_EN;
602 	PIF_WCSR(ADAPTER_CONTROL, val);
603 
604 	/* 3, light the card enable led */
605 	val = PIF_RCSR(ADAPTER_CONTROL);
606 	val |= LED_ON;
607 	PIF_WCSR(ADAPTER_CONTROL, val);
608 	printf("%s: link up\n", XNAME);
609 
610 }
611 
612 int
613 xge_init(struct ifnet *ifp)
614 {
615 	struct xge_softc *sc = ifp->if_softc;
616 	uint64_t val;
617 
618 	if (ifp->if_flags & IFF_RUNNING)
619 		return 0;
620 
621 	/* 31+32, setup MAC config */
622 	PIF_WKEY(MAC_CFG, TMAC_EN|RMAC_EN|TMAC_APPEND_PAD|RMAC_STRIP_FCS|
623 	    RMAC_BCAST_EN|RMAC_DISCARD_PFRM|RMAC_PROM_EN);
624 
625 	DELAY(1000);
626 
627 	/* 54, ensure that the adapter is 'quiescent' */
628 	val = PIF_RCSR(ADAPTER_STATUS);
629 	if ((val & QUIESCENT) != QUIESCENT) {
630 		char buf[200];
631 		printf("%s: adapter not quiescent, aborting\n", XNAME);
632 		val = (val & QUIESCENT) ^ QUIESCENT;
633 		snprintb(buf, sizeof buf, QUIESCENT_BMSK, val);
634 		printf("%s: ADAPTER_STATUS missing bits %s\n", XNAME, buf);
635 		return 1;
636 	}
637 
638 	/* 56, enable the transmit laser */
639 	val = PIF_RCSR(ADAPTER_CONTROL);
640 	val |= EOI_TX_ON;
641 	PIF_WCSR(ADAPTER_CONTROL, val);
642 
643 	xge_enable(sc);
644 	/*
645 	 * Enable all interrupts
646 	 */
647 	PIF_WCSR(TX_TRAFFIC_MASK, 0);
648 	PIF_WCSR(RX_TRAFFIC_MASK, 0);
649 	PIF_WCSR(GENERAL_INT_MASK, 0);
650 	PIF_WCSR(TXPIC_INT_MASK, 0);
651 	PIF_WCSR(RXPIC_INT_MASK, 0);
652 	PIF_WCSR(MAC_INT_MASK, MAC_TMAC_INT); /* only from RMAC */
653 	PIF_WCSR(MAC_RMAC_ERR_MASK, ~RMAC_LINK_STATE_CHANGE_INT);
654 
655 
656 	/* Done... */
657 	ifp->if_flags |= IFF_RUNNING;
658 	ifp->if_flags &= ~IFF_OACTIVE;
659 
660 	return 0;
661 }
662 
663 static void
664 xge_stop(struct ifnet *ifp, int disable)
665 {
666 	struct xge_softc *sc = ifp->if_softc;
667 	uint64_t val;
668 
669 	val = PIF_RCSR(ADAPTER_CONTROL);
670 	val &= ~ADAPTER_EN;
671 	PIF_WCSR(ADAPTER_CONTROL, val);
672 
673 	while ((PIF_RCSR(ADAPTER_STATUS) & QUIESCENT) != QUIESCENT)
674 		;
675 }
676 
677 int
678 xge_intr(void *pv)
679 {
680 	struct xge_softc *sc = pv;
681 	struct txd *txd;
682 	struct ifnet *ifp = &sc->sc_if;
683 	bus_dmamap_t dmp;
684 	uint64_t val;
685 	int i, lasttx, plen;
686 
687 	val = PIF_RCSR(GENERAL_INT_STATUS);
688 	if (val == 0)
689 		return 0; /* no interrupt here */
690 
691 	XGE_EVCNT_INCR(&sc->sc_intr);
692 
693 	PIF_WCSR(GENERAL_INT_STATUS, val);
694 
695 	if ((val = PIF_RCSR(MAC_RMAC_ERR_REG)) & RMAC_LINK_STATE_CHANGE_INT) {
696 		/* Wait for quiescence */
697 		printf("%s: link down\n", XNAME);
698 		while ((PIF_RCSR(ADAPTER_STATUS) & QUIESCENT) != QUIESCENT)
699 			;
700 		PIF_WCSR(MAC_RMAC_ERR_REG, RMAC_LINK_STATE_CHANGE_INT);
701 
702 		val = PIF_RCSR(ADAPTER_STATUS);
703 		if ((val & (RMAC_REMOTE_FAULT|RMAC_LOCAL_FAULT)) == 0)
704 			xge_enable(sc); /* Only if link restored */
705 	}
706 
707 	if ((val = PIF_RCSR(TX_TRAFFIC_INT))) {
708 		XGE_EVCNT_INCR(&sc->sc_txintr);
709 		PIF_WCSR(TX_TRAFFIC_INT, val); /* clear interrupt bits */
710 	}
711 	/*
712 	 * Collect sent packets.
713 	 */
714 	lasttx = sc->sc_lasttx;
715 	while ((i = NEXTTX(sc->sc_lasttx)) != sc->sc_nexttx) {
716 		txd = sc->sc_txd[i];
717 		dmp = sc->sc_txm[i];
718 
719 		bus_dmamap_sync(sc->sc_dmat, dmp, 0,
720 		    dmp->dm_mapsize,
721 		    BUS_DMASYNC_POSTREAD|BUS_DMASYNC_POSTWRITE);
722 
723 		if (txd->txd_control1 & TXD_CTL1_OWN) {
724 			bus_dmamap_sync(sc->sc_dmat, dmp, 0,
725 			    dmp->dm_mapsize, BUS_DMASYNC_PREREAD);
726 			break;
727 		}
728 		bus_dmamap_unload(sc->sc_dmat, dmp);
729 		m_freem(sc->sc_txb[i]);
730 		ifp->if_opackets++;
731 		sc->sc_lasttx = i;
732 	}
733 	if (i == sc->sc_nexttx) {
734 		XGE_EVCNT_INCR(&sc->sc_txqe);
735 	}
736 
737 	if (sc->sc_lasttx != lasttx)
738 		ifp->if_flags &= ~IFF_OACTIVE;
739 
740 	xge_start(ifp); /* Try to get more packets on the wire */
741 
742 	if ((val = PIF_RCSR(RX_TRAFFIC_INT))) {
743 		XGE_EVCNT_INCR(&sc->sc_rxintr);
744 		PIF_WCSR(RX_TRAFFIC_INT, val); /* clear interrupt bits */
745 	}
746 
747 	for (;;) {
748 		struct rxdesc *rxd;
749 		struct mbuf *m;
750 
751 		XGE_RXSYNC(sc->sc_nextrx,
752 		    BUS_DMASYNC_POSTREAD|BUS_DMASYNC_POSTWRITE);
753 
754 		rxd = XGE_RXD(sc->sc_nextrx);
755 		if (rxd->rxd_control1 & RXD_CTL1_OWN) {
756 			XGE_RXSYNC(sc->sc_nextrx, BUS_DMASYNC_PREREAD);
757 			break;
758 		}
759 
760 		/* got a packet */
761 		m = sc->sc_rxb[sc->sc_nextrx];
762 #if RX_MODE == RX_MODE_1
763 		plen = m->m_len = RXD_CTL2_BUF0SIZ(rxd->rxd_control2);
764 #elif RX_MODE == RX_MODE_3
765 #error Fix rxmodes in xge_intr
766 #elif RX_MODE == RX_MODE_5
767 		plen = m->m_len = RXD_CTL2_BUF0SIZ(rxd->rxd_control2);
768 		plen += m->m_next->m_len = RXD_CTL2_BUF1SIZ(rxd->rxd_control2);
769 		plen += m->m_next->m_next->m_len =
770 		    RXD_CTL2_BUF2SIZ(rxd->rxd_control2);
771 		plen += m->m_next->m_next->m_next->m_len =
772 		    RXD_CTL3_BUF3SIZ(rxd->rxd_control3);
773 		plen += m->m_next->m_next->m_next->m_next->m_len =
774 		    RXD_CTL3_BUF4SIZ(rxd->rxd_control3);
775 #endif
776 		m->m_pkthdr.rcvif = ifp;
777 		m->m_pkthdr.len = plen;
778 
779 		val = rxd->rxd_control1;
780 
781 		if (xge_add_rxbuf(sc, sc->sc_nextrx)) {
782 			/* Failed, recycle this mbuf */
783 #if RX_MODE == RX_MODE_1
784 			rxd->rxd_control2 = RXD_MKCTL2(MCLBYTES, 0, 0);
785 			rxd->rxd_control1 = RXD_CTL1_OWN;
786 #elif RX_MODE == RX_MODE_3
787 #elif RX_MODE == RX_MODE_5
788 #endif
789 			XGE_RXSYNC(sc->sc_nextrx,
790 			    BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE);
791 			ifp->if_ierrors++;
792 			break;
793 		}
794 
795 		ifp->if_ipackets++;
796 
797 		if (RXD_CTL1_PROTOS(val) & (RXD_CTL1_P_IPv4|RXD_CTL1_P_IPv6)) {
798 			m->m_pkthdr.csum_flags |= M_CSUM_IPv4;
799 			if (RXD_CTL1_L3CSUM(val) != 0xffff)
800 				m->m_pkthdr.csum_flags |= M_CSUM_IPv4_BAD;
801 		}
802 		if (RXD_CTL1_PROTOS(val) & RXD_CTL1_P_TCP) {
803 			m->m_pkthdr.csum_flags |= M_CSUM_TCPv4|M_CSUM_TCPv6;
804 			if (RXD_CTL1_L4CSUM(val) != 0xffff)
805 				m->m_pkthdr.csum_flags |= M_CSUM_TCP_UDP_BAD;
806 		}
807 		if (RXD_CTL1_PROTOS(val) & RXD_CTL1_P_UDP) {
808 			m->m_pkthdr.csum_flags |= M_CSUM_UDPv4|M_CSUM_UDPv6;
809 			if (RXD_CTL1_L4CSUM(val) != 0xffff)
810 				m->m_pkthdr.csum_flags |= M_CSUM_TCP_UDP_BAD;
811 		}
812 
813 		if (ifp->if_bpf)
814 			bpf_ops->bpf_mtap(ifp->if_bpf, m);
815 
816 		(*ifp->if_input)(ifp, m);
817 
818 		if (++sc->sc_nextrx == NRXREAL)
819 			sc->sc_nextrx = 0;
820 
821 	}
822 
823 	return 0;
824 }
825 
826 int
827 xge_ioctl(struct ifnet *ifp, u_long cmd, void *data)
828 {
829 	struct xge_softc *sc = ifp->if_softc;
830 	struct ifreq *ifr = (struct ifreq *) data;
831 	int s, error = 0;
832 
833 	s = splnet();
834 
835 	switch (cmd) {
836 	case SIOCSIFMTU:
837 		if (ifr->ifr_mtu < ETHERMIN || ifr->ifr_mtu > XGE_MAX_MTU)
838 			error = EINVAL;
839 		else if ((error = ifioctl_common(ifp, cmd, data)) == ENETRESET){
840 			PIF_WCSR(RMAC_MAX_PYLD_LEN,
841 			    RMAC_PYLD_LEN(ifr->ifr_mtu));
842 			error = 0;
843 		}
844 		break;
845 
846 	case SIOCGIFMEDIA:
847 	case SIOCSIFMEDIA:
848 		error = ifmedia_ioctl(ifp, ifr, &sc->xena_media, cmd);
849 		break;
850 
851 	default:
852 		if ((error = ether_ioctl(ifp, cmd, data)) != ENETRESET)
853 			break;
854 
855 		error = 0;
856 
857 		if (cmd != SIOCADDMULTI && cmd != SIOCDELMULTI)
858 			;
859 		else if (ifp->if_flags & IFF_RUNNING) {
860 			/* Change multicast list */
861 			xge_mcast_filter(sc);
862 		}
863 		break;
864 	}
865 
866 	splx(s);
867 	return(error);
868 }
869 
870 void
871 xge_mcast_filter(struct xge_softc *sc)
872 {
873 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
874 	struct ethercom *ec = &sc->sc_ethercom;
875 	struct ether_multi *enm;
876 	struct ether_multistep step;
877 	int i, numaddr = 1; /* first slot used for card unicast address */
878 	uint64_t val;
879 
880 	ETHER_FIRST_MULTI(step, ec, enm);
881 	while (enm != NULL) {
882 		if (memcmp(enm->enm_addrlo, enm->enm_addrhi, ETHER_ADDR_LEN)) {
883 			/* Skip ranges */
884 			goto allmulti;
885 		}
886 		if (numaddr == MAX_MCAST_ADDR)
887 			goto allmulti;
888 		for (val = 0, i = 0; i < ETHER_ADDR_LEN; i++) {
889 			val <<= 8;
890 			val |= enm->enm_addrlo[i];
891 		}
892 		PIF_WCSR(RMAC_ADDR_DATA0_MEM, val << 16);
893 		PIF_WCSR(RMAC_ADDR_DATA1_MEM, 0xFFFFFFFFFFFFFFFFULL);
894 		PIF_WCSR(RMAC_ADDR_CMD_MEM, RMAC_ADDR_CMD_MEM_WE|
895 		    RMAC_ADDR_CMD_MEM_STR|RMAC_ADDR_CMD_MEM_OFF(numaddr));
896 		while (PIF_RCSR(RMAC_ADDR_CMD_MEM) & RMAC_ADDR_CMD_MEM_STR)
897 			;
898 		numaddr++;
899 		ETHER_NEXT_MULTI(step, enm);
900 	}
901 	/* set the remaining entries to the broadcast address */
902 	for (i = numaddr; i < MAX_MCAST_ADDR; i++) {
903 		PIF_WCSR(RMAC_ADDR_DATA0_MEM, 0xffffffffffff0000ULL);
904 		PIF_WCSR(RMAC_ADDR_DATA1_MEM, 0xFFFFFFFFFFFFFFFFULL);
905 		PIF_WCSR(RMAC_ADDR_CMD_MEM, RMAC_ADDR_CMD_MEM_WE|
906 		    RMAC_ADDR_CMD_MEM_STR|RMAC_ADDR_CMD_MEM_OFF(i));
907 		while (PIF_RCSR(RMAC_ADDR_CMD_MEM) & RMAC_ADDR_CMD_MEM_STR)
908 			;
909 	}
910 	ifp->if_flags &= ~IFF_ALLMULTI;
911 	return;
912 
913 allmulti:
914 	/* Just receive everything with the multicast bit set */
915 	ifp->if_flags |= IFF_ALLMULTI;
916 	PIF_WCSR(RMAC_ADDR_DATA0_MEM, 0x8000000000000000ULL);
917 	PIF_WCSR(RMAC_ADDR_DATA1_MEM, 0xF000000000000000ULL);
918 	PIF_WCSR(RMAC_ADDR_CMD_MEM, RMAC_ADDR_CMD_MEM_WE|
919 	    RMAC_ADDR_CMD_MEM_STR|RMAC_ADDR_CMD_MEM_OFF(1));
920 	while (PIF_RCSR(RMAC_ADDR_CMD_MEM) & RMAC_ADDR_CMD_MEM_STR)
921 		;
922 }
923 
924 void
925 xge_start(struct ifnet *ifp)
926 {
927 	struct xge_softc *sc = ifp->if_softc;
928 	struct txd *txd = NULL; /* XXX - gcc */
929 	bus_dmamap_t dmp;
930 	struct	mbuf *m;
931 	uint64_t par, lcr;
932 	int nexttx = 0, ntxd, error, i;
933 
934 	if ((ifp->if_flags & (IFF_RUNNING|IFF_OACTIVE)) != IFF_RUNNING)
935 		return;
936 
937 	par = lcr = 0;
938 	for (;;) {
939 		IFQ_POLL(&ifp->if_snd, m);
940 		if (m == NULL)
941 			break;	/* out of packets */
942 
943 		if (sc->sc_nexttx == sc->sc_lasttx)
944 			break;	/* No more space */
945 
946 		nexttx = sc->sc_nexttx;
947 		dmp = sc->sc_txm[nexttx];
948 
949 		if ((error = bus_dmamap_load_mbuf(sc->sc_dmat, dmp, m,
950 		    BUS_DMA_WRITE|BUS_DMA_NOWAIT)) != 0) {
951 			printf("%s: bus_dmamap_load_mbuf error %d\n",
952 			    XNAME, error);
953 			break;
954 		}
955 		IFQ_DEQUEUE(&ifp->if_snd, m);
956 
957 		bus_dmamap_sync(sc->sc_dmat, dmp, 0, dmp->dm_mapsize,
958 		    BUS_DMASYNC_PREWRITE);
959 
960 		txd = sc->sc_txd[nexttx];
961 		sc->sc_txb[nexttx] = m;
962 		for (i = 0; i < dmp->dm_nsegs; i++) {
963 			if (dmp->dm_segs[i].ds_len == 0)
964 				continue;
965 			txd->txd_control1 = dmp->dm_segs[i].ds_len;
966 			txd->txd_control2 = 0;
967 			txd->txd_bufaddr = dmp->dm_segs[i].ds_addr;
968 			txd++;
969 		}
970 		ntxd = txd - sc->sc_txd[nexttx] - 1;
971 		txd = sc->sc_txd[nexttx];
972 		txd->txd_control1 |= TXD_CTL1_OWN|TXD_CTL1_GCF;
973 		txd->txd_control2 = TXD_CTL2_UTIL;
974 		if (m->m_pkthdr.csum_flags & M_CSUM_TSOv4) {
975 			txd->txd_control1 |= TXD_CTL1_MSS(m->m_pkthdr.segsz);
976 			txd->txd_control1 |= TXD_CTL1_LSO;
977 		}
978 
979 		if (m->m_pkthdr.csum_flags & M_CSUM_IPv4)
980 			txd->txd_control2 |= TXD_CTL2_CIPv4;
981 		if (m->m_pkthdr.csum_flags & M_CSUM_TCPv4)
982 			txd->txd_control2 |= TXD_CTL2_CTCP;
983 		if (m->m_pkthdr.csum_flags & M_CSUM_UDPv4)
984 			txd->txd_control2 |= TXD_CTL2_CUDP;
985 		txd[ntxd].txd_control1 |= TXD_CTL1_GCL;
986 
987 		bus_dmamap_sync(sc->sc_dmat, dmp, 0, dmp->dm_mapsize,
988 		    BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE);
989 
990 		par = sc->sc_txdp[nexttx];
991 		lcr = TXDL_NUMTXD(ntxd) | TXDL_LGC_FIRST | TXDL_LGC_LAST;
992 		if (m->m_pkthdr.csum_flags & M_CSUM_TSOv4)
993 			lcr |= TXDL_SFF;
994 		TXP_WCSR(TXDL_PAR, par);
995 		TXP_WCSR(TXDL_LCR, lcr);
996 
997 		if (ifp->if_bpf)
998 			bpf_ops->bpf_mtap(ifp->if_bpf, m);
999 
1000 		sc->sc_nexttx = NEXTTX(nexttx);
1001 	}
1002 }
1003 
1004 /*
1005  * Allocate DMA memory for transmit descriptor fragments.
1006  * Only one map is used for all descriptors.
1007  */
1008 int
1009 xge_alloc_txmem(struct xge_softc *sc)
1010 {
1011 	struct txd *txp;
1012 	bus_dma_segment_t seg;
1013 	bus_addr_t txdp;
1014 	void *kva;
1015 	int i, rseg, state;
1016 
1017 #define TXMAPSZ (NTXDESCS*NTXFRAGS*sizeof(struct txd))
1018 	state = 0;
1019 	if (bus_dmamem_alloc(sc->sc_dmat, TXMAPSZ, PAGE_SIZE, 0,
1020 	    &seg, 1, &rseg, BUS_DMA_NOWAIT))
1021 		goto err;
1022 	state++;
1023 	if (bus_dmamem_map(sc->sc_dmat, &seg, rseg, TXMAPSZ, &kva,
1024 	    BUS_DMA_NOWAIT))
1025 		goto err;
1026 
1027 	state++;
1028 	if (bus_dmamap_create(sc->sc_dmat, TXMAPSZ, 1, TXMAPSZ, 0,
1029 	    BUS_DMA_NOWAIT, &sc->sc_txmap))
1030 		goto err;
1031 	state++;
1032 	if (bus_dmamap_load(sc->sc_dmat, sc->sc_txmap,
1033 	    kva, TXMAPSZ, NULL, BUS_DMA_NOWAIT))
1034 		goto err;
1035 
1036 	/* setup transmit array pointers */
1037 	txp = (struct txd *)kva;
1038 	txdp = seg.ds_addr;
1039 	for (txp = (struct txd *)kva, i = 0; i < NTXDESCS; i++) {
1040 		sc->sc_txd[i] = txp;
1041 		sc->sc_txdp[i] = txdp;
1042 		txp += NTXFRAGS;
1043 		txdp += (NTXFRAGS * sizeof(struct txd));
1044 	}
1045 
1046 	return 0;
1047 
1048 err:
1049 	if (state > 2)
1050 		bus_dmamap_destroy(sc->sc_dmat, sc->sc_txmap);
1051 	if (state > 1)
1052 		bus_dmamem_unmap(sc->sc_dmat, kva, TXMAPSZ);
1053 	if (state > 0)
1054 		bus_dmamem_free(sc->sc_dmat, &seg, rseg);
1055 	return ENOBUFS;
1056 }
1057 
1058 /*
1059  * Allocate DMA memory for receive descriptor,
1060  * only one map is used for all descriptors.
1061  * link receive descriptor pages together.
1062  */
1063 int
1064 xge_alloc_rxmem(struct xge_softc *sc)
1065 {
1066 	struct rxd_4k *rxpp;
1067 	bus_dma_segment_t seg;
1068 	void *kva;
1069 	int i, rseg, state;
1070 
1071 	/* sanity check */
1072 	if (sizeof(struct rxd_4k) != XGE_PAGE) {
1073 		printf("bad compiler struct alignment, %d != %d\n",
1074 		    (int)sizeof(struct rxd_4k), XGE_PAGE);
1075 		return EINVAL;
1076 	}
1077 
1078 	state = 0;
1079 	if (bus_dmamem_alloc(sc->sc_dmat, RXMAPSZ, PAGE_SIZE, 0,
1080 	    &seg, 1, &rseg, BUS_DMA_NOWAIT))
1081 		goto err;
1082 	state++;
1083 	if (bus_dmamem_map(sc->sc_dmat, &seg, rseg, RXMAPSZ, &kva,
1084 	    BUS_DMA_NOWAIT))
1085 		goto err;
1086 
1087 	state++;
1088 	if (bus_dmamap_create(sc->sc_dmat, RXMAPSZ, 1, RXMAPSZ, 0,
1089 	    BUS_DMA_NOWAIT, &sc->sc_rxmap))
1090 		goto err;
1091 	state++;
1092 	if (bus_dmamap_load(sc->sc_dmat, sc->sc_rxmap,
1093 	    kva, RXMAPSZ, NULL, BUS_DMA_NOWAIT))
1094 		goto err;
1095 
1096 	/* setup receive page link pointers */
1097 	for (rxpp = (struct rxd_4k *)kva, i = 0; i < NRXPAGES; i++, rxpp++) {
1098 		sc->sc_rxd_4k[i] = rxpp;
1099 		rxpp->r4_next = (uint64_t)sc->sc_rxmap->dm_segs[0].ds_addr +
1100 		    (i*sizeof(struct rxd_4k)) + sizeof(struct rxd_4k);
1101 	}
1102 	sc->sc_rxd_4k[NRXPAGES-1]->r4_next =
1103 	    (uint64_t)sc->sc_rxmap->dm_segs[0].ds_addr;
1104 
1105 	return 0;
1106 
1107 err:
1108 	if (state > 2)
1109 		bus_dmamap_destroy(sc->sc_dmat, sc->sc_txmap);
1110 	if (state > 1)
1111 		bus_dmamem_unmap(sc->sc_dmat, kva, TXMAPSZ);
1112 	if (state > 0)
1113 		bus_dmamem_free(sc->sc_dmat, &seg, rseg);
1114 	return ENOBUFS;
1115 }
1116 
1117 
1118 /*
1119  * Add a new mbuf chain to descriptor id.
1120  */
1121 int
1122 xge_add_rxbuf(struct xge_softc *sc, int id)
1123 {
1124 	struct rxdesc *rxd;
1125 	struct mbuf *m[5];
1126 	int page, desc, error;
1127 #if RX_MODE == RX_MODE_5
1128 	int i;
1129 #endif
1130 
1131 	page = id/NDESC_BUFMODE;
1132 	desc = id%NDESC_BUFMODE;
1133 
1134 	rxd = &sc->sc_rxd_4k[page]->r4_rxd[desc];
1135 
1136 	/*
1137 	 * Allocate mbufs.
1138 	 * Currently five mbufs and two clusters are used,
1139 	 * the hardware will put (ethernet, ip, tcp/udp) headers in
1140 	 * their own buffer and the clusters are only used for data.
1141 	 */
1142 #if RX_MODE == RX_MODE_1
1143 	MGETHDR(m[0], M_DONTWAIT, MT_DATA);
1144 	if (m[0] == NULL)
1145 		return ENOBUFS;
1146 	MCLGET(m[0], M_DONTWAIT);
1147 	if ((m[0]->m_flags & M_EXT) == 0) {
1148 		m_freem(m[0]);
1149 		return ENOBUFS;
1150 	}
1151 	m[0]->m_len = m[0]->m_pkthdr.len = m[0]->m_ext.ext_size;
1152 #elif RX_MODE == RX_MODE_3
1153 #error missing rxmode 3.
1154 #elif RX_MODE == RX_MODE_5
1155 	MGETHDR(m[0], M_DONTWAIT, MT_DATA);
1156 	for (i = 1; i < 5; i++) {
1157 		MGET(m[i], M_DONTWAIT, MT_DATA);
1158 	}
1159 	if (m[3])
1160 		MCLGET(m[3], M_DONTWAIT);
1161 	if (m[4])
1162 		MCLGET(m[4], M_DONTWAIT);
1163 	if (!m[0] || !m[1] || !m[2] || !m[3] || !m[4] ||
1164 	    ((m[3]->m_flags & M_EXT) == 0) || ((m[4]->m_flags & M_EXT) == 0)) {
1165 		/* Out of something */
1166 		for (i = 0; i < 5; i++)
1167 			if (m[i] != NULL)
1168 				m_free(m[i]);
1169 		return ENOBUFS;
1170 	}
1171 	/* Link'em together */
1172 	m[0]->m_next = m[1];
1173 	m[1]->m_next = m[2];
1174 	m[2]->m_next = m[3];
1175 	m[3]->m_next = m[4];
1176 #else
1177 #error bad mode RX_MODE
1178 #endif
1179 
1180 	if (sc->sc_rxb[id])
1181 		bus_dmamap_unload(sc->sc_dmat, sc->sc_rxm[id]);
1182 	sc->sc_rxb[id] = m[0];
1183 
1184 	error = bus_dmamap_load_mbuf(sc->sc_dmat, sc->sc_rxm[id], m[0],
1185 	    BUS_DMA_READ|BUS_DMA_NOWAIT);
1186 	if (error)
1187 		return error;
1188 	bus_dmamap_sync(sc->sc_dmat, sc->sc_rxm[id], 0,
1189 	    sc->sc_rxm[id]->dm_mapsize, BUS_DMASYNC_PREREAD);
1190 
1191 #if RX_MODE == RX_MODE_1
1192 	rxd->rxd_control2 = RXD_MKCTL2(m[0]->m_len, 0, 0);
1193 	rxd->rxd_buf0 = (uint64_t)sc->sc_rxm[id]->dm_segs[0].ds_addr;
1194 	rxd->rxd_control1 = RXD_CTL1_OWN;
1195 #elif RX_MODE == RX_MODE_3
1196 #elif RX_MODE == RX_MODE_5
1197 	rxd->rxd_control3 = RXD_MKCTL3(0, m[3]->m_len, m[4]->m_len);
1198 	rxd->rxd_control2 = RXD_MKCTL2(m[0]->m_len, m[1]->m_len, m[2]->m_len);
1199 	rxd->rxd_buf0 = (uint64_t)sc->sc_rxm[id]->dm_segs[0].ds_addr;
1200 	rxd->rxd_buf1 = (uint64_t)sc->sc_rxm[id]->dm_segs[1].ds_addr;
1201 	rxd->rxd_buf2 = (uint64_t)sc->sc_rxm[id]->dm_segs[2].ds_addr;
1202 	rxd->rxd_buf3 = (uint64_t)sc->sc_rxm[id]->dm_segs[3].ds_addr;
1203 	rxd->rxd_buf4 = (uint64_t)sc->sc_rxm[id]->dm_segs[4].ds_addr;
1204 	rxd->rxd_control1 = RXD_CTL1_OWN;
1205 #endif
1206 
1207 	XGE_RXSYNC(id, BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE);
1208 	return 0;
1209 }
1210 
1211 /*
1212  * These magics comes from the FreeBSD driver.
1213  */
1214 int
1215 xge_setup_xgxs(struct xge_softc *sc)
1216 {
1217 	/* The magic numbers are described in the users guide */
1218 
1219 	/* Writing to MDIO 0x8000 (Global Config 0) */
1220 	PIF_WCSR(DTX_CONTROL, 0x8000051500000000ULL); DELAY(50);
1221 	PIF_WCSR(DTX_CONTROL, 0x80000515000000E0ULL); DELAY(50);
1222 	PIF_WCSR(DTX_CONTROL, 0x80000515D93500E4ULL); DELAY(50);
1223 
1224 	/* Writing to MDIO 0x8000 (Global Config 1) */
1225 	PIF_WCSR(DTX_CONTROL, 0x8001051500000000ULL); DELAY(50);
1226 	PIF_WCSR(DTX_CONTROL, 0x80010515000000e0ULL); DELAY(50);
1227 	PIF_WCSR(DTX_CONTROL, 0x80010515001e00e4ULL); DELAY(50);
1228 
1229 	/* Reset the Gigablaze */
1230 	PIF_WCSR(DTX_CONTROL, 0x8002051500000000ULL); DELAY(50);
1231 	PIF_WCSR(DTX_CONTROL, 0x80020515000000E0ULL); DELAY(50);
1232 	PIF_WCSR(DTX_CONTROL, 0x80020515F21000E4ULL); DELAY(50);
1233 
1234 	/* read the pole settings */
1235 	PIF_WCSR(DTX_CONTROL, 0x8000051500000000ULL); DELAY(50);
1236 	PIF_WCSR(DTX_CONTROL, 0x80000515000000e0ULL); DELAY(50);
1237 	PIF_WCSR(DTX_CONTROL, 0x80000515000000ecULL); DELAY(50);
1238 
1239 	PIF_WCSR(DTX_CONTROL, 0x8001051500000000ULL); DELAY(50);
1240 	PIF_WCSR(DTX_CONTROL, 0x80010515000000e0ULL); DELAY(50);
1241 	PIF_WCSR(DTX_CONTROL, 0x80010515000000ecULL); DELAY(50);
1242 
1243 	PIF_WCSR(DTX_CONTROL, 0x8002051500000000ULL); DELAY(50);
1244 	PIF_WCSR(DTX_CONTROL, 0x80020515000000e0ULL); DELAY(50);
1245 	PIF_WCSR(DTX_CONTROL, 0x80020515000000ecULL); DELAY(50);
1246 
1247 	/* Workaround for TX Lane XAUI initialization error.
1248 	   Read Xpak PHY register 24 for XAUI lane status */
1249 	PIF_WCSR(DTX_CONTROL, 0x0018040000000000ULL); DELAY(50);
1250 	PIF_WCSR(DTX_CONTROL, 0x00180400000000e0ULL); DELAY(50);
1251 	PIF_WCSR(DTX_CONTROL, 0x00180400000000ecULL); DELAY(50);
1252 
1253 	/*
1254 	 * Reading the MDIO control with value 0x1804001c0F001c
1255 	 * means the TxLanes were already in sync
1256 	 * Reading the MDIO control with value 0x1804000c0x001c
1257 	 * means some TxLanes are not in sync where x is a 4-bit
1258 	 * value representing each lanes
1259 	 */
1260 #if 0
1261 	val = PIF_RCSR(MDIO_CONTROL);
1262 	if (val != 0x1804001c0F001cULL) {
1263 		printf("%s: MDIO_CONTROL: %llx != %llx\n",
1264 		    XNAME, val, 0x1804001c0F001cULL);
1265 		return 1;
1266 	}
1267 #endif
1268 
1269 	/* Set and remove the DTE XS INTLoopBackN */
1270 	PIF_WCSR(DTX_CONTROL, 0x0000051500000000ULL); DELAY(50);
1271 	PIF_WCSR(DTX_CONTROL, 0x00000515604000e0ULL); DELAY(50);
1272 	PIF_WCSR(DTX_CONTROL, 0x00000515604000e4ULL); DELAY(50);
1273 	PIF_WCSR(DTX_CONTROL, 0x00000515204000e4ULL); DELAY(50);
1274 	PIF_WCSR(DTX_CONTROL, 0x00000515204000ecULL); DELAY(50);
1275 
1276 #if 0
1277 	/* Reading the DTX control register Should be 0x5152040001c */
1278 	val = PIF_RCSR(DTX_CONTROL);
1279 	if (val != 0x5152040001cULL) {
1280 		printf("%s: DTX_CONTROL: %llx != %llx\n",
1281 		    XNAME, val, 0x5152040001cULL);
1282 		return 1;
1283 	}
1284 #endif
1285 
1286 	PIF_WCSR(MDIO_CONTROL, 0x0018040000000000ULL); DELAY(50);
1287 	PIF_WCSR(MDIO_CONTROL, 0x00180400000000e0ULL); DELAY(50);
1288 	PIF_WCSR(MDIO_CONTROL, 0x00180400000000ecULL); DELAY(50);
1289 
1290 #if 0
1291 	/* Reading the MIOD control should be 0x1804001c0f001c */
1292 	val = PIF_RCSR(MDIO_CONTROL);
1293 	if (val != 0x1804001c0f001cULL) {
1294 		printf("%s: MDIO_CONTROL2: %llx != %llx\n",
1295 		    XNAME, val, 0x1804001c0f001cULL);
1296 		return 1;
1297 	}
1298 #endif
1299 	return 0;
1300 }
1301