xref: /netbsd-src/sys/dev/pci/if_xge.c (revision b7b7574d3bf8eeb51a1fa3977b59142ec6434a55)
1 /*      $NetBSD: if_xge.c,v 1.19 2014/03/29 19:28:25 christos Exp $ */
2 
3 /*
4  * Copyright (c) 2004, SUNET, Swedish University Computer Network.
5  * All rights reserved.
6  *
7  * Written by Anders Magnusson for SUNET, Swedish University Computer Network.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. All advertising materials mentioning features or use of this software
18  *    must display the following acknowledgement:
19  *      This product includes software developed for the NetBSD Project by
20  *      SUNET, Swedish University Computer Network.
21  * 4. The name of SUNET may not be used to endorse or promote products
22  *    derived from this software without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY SUNET ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
26  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
27  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL SUNET
28  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34  * POSSIBILITY OF SUCH DAMAGE.
35  */
36 
37 /*
38  * Device driver for the S2io Xframe Ten Gigabit Ethernet controller.
39  *
40  * TODO (in no specific order):
41  *	HW VLAN support.
42  *	IPv6 HW cksum.
43  */
44 
45 #include <sys/cdefs.h>
46 __KERNEL_RCSID(0, "$NetBSD: if_xge.c,v 1.19 2014/03/29 19:28:25 christos Exp $");
47 
48 
49 #include <sys/param.h>
50 #include <sys/systm.h>
51 #include <sys/mbuf.h>
52 #include <sys/malloc.h>
53 #include <sys/kernel.h>
54 #include <sys/socket.h>
55 #include <sys/device.h>
56 
57 #include <sys/rnd.h>
58 
59 #include <net/if.h>
60 #include <net/if_dl.h>
61 #include <net/if_media.h>
62 #include <net/if_ether.h>
63 
64 #include <net/bpf.h>
65 
66 #include <sys/bus.h>
67 #include <sys/intr.h>
68 #include <machine/endian.h>
69 
70 #include <dev/mii/mii.h>
71 #include <dev/mii/miivar.h>
72 
73 #include <dev/pci/pcivar.h>
74 #include <dev/pci/pcireg.h>
75 #include <dev/pci/pcidevs.h>
76 
77 #include <sys/proc.h>
78 
79 #include <dev/pci/if_xgereg.h>
80 
81 /*
82  * Some tunable constants, tune with care!
83  */
84 #define RX_MODE		RX_MODE_1  /* Receive mode (buffer usage, see below) */
85 #define NRXDESCS	1016	   /* # of receive descriptors (requested) */
86 #define NTXDESCS	8192	   /* Number of transmit descriptors */
87 #define NTXFRAGS	100	   /* Max fragments per packet */
88 #define XGE_EVENT_COUNTERS	   /* Instrumentation */
89 
90 /*
91  * Receive buffer modes; 1, 3 or 5 buffers.
92  */
93 #define RX_MODE_1 1
94 #define RX_MODE_3 3
95 #define RX_MODE_5 5
96 
97 /*
98  * Use clever macros to avoid a bunch of #ifdef's.
99  */
100 #define XCONCAT3(x,y,z) x ## y ## z
101 #define CONCAT3(x,y,z) XCONCAT3(x,y,z)
102 #define NDESC_BUFMODE CONCAT3(NDESC_,RX_MODE,BUFMODE)
103 #define rxd_4k CONCAT3(rxd,RX_MODE,_4k)
104 #define rxdesc ___CONCAT(rxd,RX_MODE)
105 
106 #define NEXTTX(x)	(((x)+1) % NTXDESCS)
107 #define NRXFRAGS	RX_MODE /* hardware imposed frags */
108 #define NRXPAGES	((NRXDESCS/NDESC_BUFMODE)+1)
109 #define NRXREAL		(NRXPAGES*NDESC_BUFMODE)
110 #define RXMAPSZ		(NRXPAGES*PAGE_SIZE)
111 
112 #ifdef XGE_EVENT_COUNTERS
113 #define XGE_EVCNT_INCR(ev)	(ev)->ev_count++
114 #else
115 #define XGE_EVCNT_INCR(ev)	/* nothing */
116 #endif
117 
118 /*
119  * Magics to fix a bug when the mac address can't be read correctly.
120  * Comes from the Linux driver.
121  */
122 static uint64_t fix_mac[] = {
123 	0x0060000000000000ULL, 0x0060600000000000ULL,
124 	0x0040600000000000ULL, 0x0000600000000000ULL,
125 	0x0020600000000000ULL, 0x0060600000000000ULL,
126 	0x0020600000000000ULL, 0x0060600000000000ULL,
127 	0x0020600000000000ULL, 0x0060600000000000ULL,
128 	0x0020600000000000ULL, 0x0060600000000000ULL,
129 	0x0020600000000000ULL, 0x0060600000000000ULL,
130 	0x0020600000000000ULL, 0x0060600000000000ULL,
131 	0x0020600000000000ULL, 0x0060600000000000ULL,
132 	0x0020600000000000ULL, 0x0060600000000000ULL,
133 	0x0020600000000000ULL, 0x0060600000000000ULL,
134 	0x0020600000000000ULL, 0x0060600000000000ULL,
135 	0x0020600000000000ULL, 0x0000600000000000ULL,
136 	0x0040600000000000ULL, 0x0060600000000000ULL,
137 };
138 
139 
140 struct xge_softc {
141 	device_t sc_dev;
142 	struct ethercom sc_ethercom;
143 #define sc_if sc_ethercom.ec_if
144 	bus_dma_tag_t sc_dmat;
145 	bus_space_tag_t sc_st;
146 	bus_space_handle_t sc_sh;
147 	bus_space_tag_t sc_txt;
148 	bus_space_handle_t sc_txh;
149 	void *sc_ih;
150 
151 	struct ifmedia xena_media;
152 	pcireg_t sc_pciregs[16];
153 
154 	/* Transmit structures */
155 	struct txd *sc_txd[NTXDESCS];	/* transmit frags array */
156 	bus_addr_t sc_txdp[NTXDESCS];	/* bus address of transmit frags */
157 	bus_dmamap_t sc_txm[NTXDESCS];	/* transmit frags map */
158 	struct mbuf *sc_txb[NTXDESCS];	/* transmit mbuf pointer */
159 	int sc_nexttx, sc_lasttx;
160 	bus_dmamap_t sc_txmap;		/* transmit descriptor map */
161 
162 	/* Receive data */
163 	bus_dmamap_t sc_rxmap;		/* receive descriptor map */
164 	struct rxd_4k *sc_rxd_4k[NRXPAGES]; /* receive desc pages */
165 	bus_dmamap_t sc_rxm[NRXREAL];	/* receive buffer map */
166 	struct mbuf *sc_rxb[NRXREAL];	/* mbufs on receive descriptors */
167 	int sc_nextrx;			/* next descriptor to check */
168 
169 #ifdef XGE_EVENT_COUNTERS
170 	struct evcnt sc_intr;	/* # of interrupts */
171 	struct evcnt sc_txintr;	/* # of transmit interrupts */
172 	struct evcnt sc_rxintr;	/* # of receive interrupts */
173 	struct evcnt sc_txqe;	/* # of xmit intrs when board queue empty */
174 #endif
175 };
176 
177 static int xge_match(device_t parent, cfdata_t cf, void *aux);
178 static void xge_attach(device_t parent, device_t self, void *aux);
179 static int xge_alloc_txmem(struct xge_softc *);
180 static int xge_alloc_rxmem(struct xge_softc *);
181 static void xge_start(struct ifnet *);
182 static void xge_stop(struct ifnet *, int);
183 static int xge_add_rxbuf(struct xge_softc *, int);
184 static void xge_mcast_filter(struct xge_softc *sc);
185 static int xge_setup_xgxs(struct xge_softc *sc);
186 static int xge_ioctl(struct ifnet *ifp, u_long cmd, void *data);
187 static int xge_init(struct ifnet *ifp);
188 static void xge_ifmedia_status(struct ifnet *, struct ifmediareq *);
189 static int xge_xgmii_mediachange(struct ifnet *);
190 static int xge_intr(void  *);
191 
192 /*
193  * Helpers to address registers.
194  */
195 #define PIF_WCSR(csr, val)	pif_wcsr(sc, csr, val)
196 #define PIF_RCSR(csr)		pif_rcsr(sc, csr)
197 #define TXP_WCSR(csr, val)	txp_wcsr(sc, csr, val)
198 #define PIF_WKEY(csr, val)	pif_wkey(sc, csr, val)
199 
200 static inline void
201 pif_wcsr(struct xge_softc *sc, bus_size_t csr, uint64_t val)
202 {
203 	uint32_t lval, hval;
204 
205 	lval = val&0xffffffff;
206 	hval = val>>32;
207 	bus_space_write_4(sc->sc_st, sc->sc_sh, csr, lval);
208 	bus_space_write_4(sc->sc_st, sc->sc_sh, csr+4, hval);
209 }
210 
211 static inline uint64_t
212 pif_rcsr(struct xge_softc *sc, bus_size_t csr)
213 {
214 	uint64_t val, val2;
215 	val = bus_space_read_4(sc->sc_st, sc->sc_sh, csr);
216 	val2 = bus_space_read_4(sc->sc_st, sc->sc_sh, csr+4);
217 	val |= (val2 << 32);
218 	return val;
219 }
220 
221 static inline void
222 txp_wcsr(struct xge_softc *sc, bus_size_t csr, uint64_t val)
223 {
224 	uint32_t lval, hval;
225 
226 	lval = val&0xffffffff;
227 	hval = val>>32;
228 	bus_space_write_4(sc->sc_txt, sc->sc_txh, csr, lval);
229 	bus_space_write_4(sc->sc_txt, sc->sc_txh, csr+4, hval);
230 }
231 
232 
233 static inline void
234 pif_wkey(struct xge_softc *sc, bus_size_t csr, uint64_t val)
235 {
236 	uint32_t lval, hval;
237 
238 	lval = val&0xffffffff;
239 	hval = val>>32;
240 	PIF_WCSR(RMAC_CFG_KEY, RMAC_KEY_VALUE);
241 	bus_space_write_4(sc->sc_st, sc->sc_sh, csr, lval);
242 	PIF_WCSR(RMAC_CFG_KEY, RMAC_KEY_VALUE);
243 	bus_space_write_4(sc->sc_st, sc->sc_sh, csr+4, hval);
244 }
245 
246 
247 CFATTACH_DECL_NEW(xge, sizeof(struct xge_softc),
248     xge_match, xge_attach, NULL, NULL);
249 
250 #define XNAME device_xname(sc->sc_dev)
251 
252 #define XGE_RXSYNC(desc, what) \
253 	bus_dmamap_sync(sc->sc_dmat, sc->sc_rxmap, \
254 	(desc/NDESC_BUFMODE) * XGE_PAGE + sizeof(struct rxdesc) * \
255 	(desc%NDESC_BUFMODE), sizeof(struct rxdesc), what)
256 #define XGE_RXD(desc)	&sc->sc_rxd_4k[desc/NDESC_BUFMODE]-> \
257 	r4_rxd[desc%NDESC_BUFMODE]
258 
259 /*
260  * Non-tunable constants.
261  */
262 #define XGE_MAX_MTU		9600
263 #define	XGE_IP_MAXPACKET	65535	/* same as IP_MAXPACKET */
264 
265 static int
266 xge_match(device_t parent, cfdata_t cf, void *aux)
267 {
268 	struct pci_attach_args *pa = aux;
269 
270 	if (PCI_VENDOR(pa->pa_id) == PCI_VENDOR_S2IO &&
271 	    PCI_PRODUCT(pa->pa_id) == PCI_PRODUCT_S2IO_XFRAME)
272 		return (1);
273 
274 	return (0);
275 }
276 
277 void
278 xge_attach(device_t parent, device_t self, void *aux)
279 {
280 	struct pci_attach_args *pa = aux;
281 	struct xge_softc *sc;
282 	struct ifnet *ifp;
283 	pcireg_t memtype;
284 	pci_intr_handle_t ih;
285 	const char *intrstr = NULL;
286 	pci_chipset_tag_t pc = pa->pa_pc;
287 	uint8_t enaddr[ETHER_ADDR_LEN];
288 	uint64_t val;
289 	int i;
290 	char intrbuf[PCI_INTRSTR_LEN];
291 
292 	sc = device_private(self);
293 	sc->sc_dev = self;
294 	sc->sc_dmat = pa->pa_dmat;
295 
296 	/* Get BAR0 address */
297 	memtype = pci_mapreg_type(pa->pa_pc, pa->pa_tag, XGE_PIF_BAR);
298 	if (pci_mapreg_map(pa, XGE_PIF_BAR, memtype, 0,
299 	    &sc->sc_st, &sc->sc_sh, 0, 0)) {
300 		aprint_error("%s: unable to map PIF BAR registers\n", XNAME);
301 		return;
302 	}
303 
304 	memtype = pci_mapreg_type(pa->pa_pc, pa->pa_tag, XGE_TXP_BAR);
305 	if (pci_mapreg_map(pa, XGE_TXP_BAR, memtype, 0,
306 	    &sc->sc_txt, &sc->sc_txh, 0, 0)) {
307 		aprint_error("%s: unable to map TXP BAR registers\n", XNAME);
308 		return;
309 	}
310 
311 	/* Save PCI config space */
312 	for (i = 0; i < 64; i += 4)
313 		sc->sc_pciregs[i/4] = pci_conf_read(pa->pa_pc, pa->pa_tag, i);
314 
315 #if BYTE_ORDER == LITTLE_ENDIAN
316 	val = (uint64_t)0xFFFFFFFFFFFFFFFFULL;
317 	val &= ~(TxF_R_SE|RxF_W_SE);
318 	PIF_WCSR(SWAPPER_CTRL, val);
319 	PIF_WCSR(SWAPPER_CTRL, val);
320 #elif BYTE_ORDER == BIG_ENDIAN
321 	/* do nothing */
322 #else
323 #error bad endianness!
324 #endif
325 
326 	if ((val = PIF_RCSR(PIF_RD_SWAPPER_Fb)) != SWAPPER_MAGIC)
327 		return printf("%s: failed configuring endian, %llx != %llx!\n",
328 		    XNAME, (unsigned long long)val, SWAPPER_MAGIC);
329 
330 	/*
331 	 * The MAC addr may be all FF's, which is not good.
332 	 * Resolve it by writing some magics to GPIO_CONTROL and
333 	 * force a chip reset to read in the serial eeprom again.
334 	 */
335 	for (i = 0; i < sizeof(fix_mac)/sizeof(fix_mac[0]); i++) {
336 		PIF_WCSR(GPIO_CONTROL, fix_mac[i]);
337 		PIF_RCSR(GPIO_CONTROL);
338 	}
339 
340 	/*
341 	 * Reset the chip and restore the PCI registers.
342 	 */
343 	PIF_WCSR(SW_RESET, 0xa5a5a50000000000ULL);
344 	DELAY(500000);
345 	for (i = 0; i < 64; i += 4)
346 		pci_conf_write(pa->pa_pc, pa->pa_tag, i, sc->sc_pciregs[i/4]);
347 
348 	/*
349 	 * Restore the byte order registers.
350 	 */
351 #if BYTE_ORDER == LITTLE_ENDIAN
352 	val = (uint64_t)0xFFFFFFFFFFFFFFFFULL;
353 	val &= ~(TxF_R_SE|RxF_W_SE);
354 	PIF_WCSR(SWAPPER_CTRL, val);
355 	PIF_WCSR(SWAPPER_CTRL, val);
356 #elif BYTE_ORDER == BIG_ENDIAN
357 	/* do nothing */
358 #else
359 #error bad endianness!
360 #endif
361 
362 	if ((val = PIF_RCSR(PIF_RD_SWAPPER_Fb)) != SWAPPER_MAGIC)
363 		return printf("%s: failed configuring endian2, %llx != %llx!\n",
364 		    XNAME, (unsigned long long)val, SWAPPER_MAGIC);
365 
366 	/*
367 	 * XGXS initialization.
368 	 */
369 	/* 29, reset */
370 	PIF_WCSR(SW_RESET, 0);
371 	DELAY(500000);
372 
373 	/* 30, configure XGXS transceiver */
374 	xge_setup_xgxs(sc);
375 
376 	/* 33, program MAC address (not needed here) */
377 	/* Get ethernet address */
378 	PIF_WCSR(RMAC_ADDR_CMD_MEM,
379 	    RMAC_ADDR_CMD_MEM_STR|RMAC_ADDR_CMD_MEM_OFF(0));
380 	while (PIF_RCSR(RMAC_ADDR_CMD_MEM) & RMAC_ADDR_CMD_MEM_STR)
381 		;
382 	val = PIF_RCSR(RMAC_ADDR_DATA0_MEM);
383 	for (i = 0; i < ETHER_ADDR_LEN; i++)
384 		enaddr[i] = (uint8_t)(val >> (56 - (8*i)));
385 
386 	/*
387 	 * Get memory for transmit descriptor lists.
388 	 */
389 	if (xge_alloc_txmem(sc))
390 		return printf("%s: failed allocating txmem.\n", XNAME);
391 
392 	/* 9 and 10 - set FIFO number/prio */
393 	PIF_WCSR(TX_FIFO_P0, TX_FIFO_LEN0(NTXDESCS));
394 	PIF_WCSR(TX_FIFO_P1, 0ULL);
395 	PIF_WCSR(TX_FIFO_P2, 0ULL);
396 	PIF_WCSR(TX_FIFO_P3, 0ULL);
397 
398 	/* 11, XXX set round-robin prio? */
399 
400 	/* 12, enable transmit FIFO */
401 	val = PIF_RCSR(TX_FIFO_P0);
402 	val |= TX_FIFO_ENABLE;
403 	PIF_WCSR(TX_FIFO_P0, val);
404 
405 	/* 13, disable some error checks */
406 	PIF_WCSR(TX_PA_CFG,
407 	    TX_PA_CFG_IFR|TX_PA_CFG_ISO|TX_PA_CFG_ILC|TX_PA_CFG_ILE);
408 
409 	/*
410 	 * Create transmit DMA maps.
411 	 * Make them large for TSO.
412 	 */
413 	for (i = 0; i < NTXDESCS; i++) {
414 		if (bus_dmamap_create(sc->sc_dmat, XGE_IP_MAXPACKET,
415 		    NTXFRAGS, MCLBYTES, 0, 0, &sc->sc_txm[i]))
416 			return printf("%s: cannot create TX DMA maps\n", XNAME);
417 	}
418 
419 	sc->sc_lasttx = NTXDESCS-1;
420 
421 	/*
422 	 * RxDMA initialization.
423 	 * Only use one out of 8 possible receive queues.
424 	 */
425 	if (xge_alloc_rxmem(sc))	/* allocate rx descriptor memory */
426 		return printf("%s: failed allocating rxmem\n", XNAME);
427 
428 	/* Create receive buffer DMA maps */
429 	for (i = 0; i < NRXREAL; i++) {
430 		if (bus_dmamap_create(sc->sc_dmat, XGE_MAX_MTU,
431 		    NRXFRAGS, MCLBYTES, 0, 0, &sc->sc_rxm[i]))
432 			return printf("%s: cannot create RX DMA maps\n", XNAME);
433 	}
434 
435 	/* allocate mbufs to receive descriptors */
436 	for (i = 0; i < NRXREAL; i++)
437 		if (xge_add_rxbuf(sc, i))
438 			panic("out of mbufs too early");
439 
440 	/* 14, setup receive ring priority */
441 	PIF_WCSR(RX_QUEUE_PRIORITY, 0ULL); /* only use one ring */
442 
443 	/* 15, setup receive ring round-robin calendar */
444 	PIF_WCSR(RX_W_ROUND_ROBIN_0, 0ULL); /* only use one ring */
445 	PIF_WCSR(RX_W_ROUND_ROBIN_1, 0ULL);
446 	PIF_WCSR(RX_W_ROUND_ROBIN_2, 0ULL);
447 	PIF_WCSR(RX_W_ROUND_ROBIN_3, 0ULL);
448 	PIF_WCSR(RX_W_ROUND_ROBIN_4, 0ULL);
449 
450 	/* 16, write receive ring start address */
451 	PIF_WCSR(PRC_RXD0_0, (uint64_t)sc->sc_rxmap->dm_segs[0].ds_addr);
452 	/* PRC_RXD0_[1-7] are not used */
453 
454 	/* 17, Setup alarm registers */
455 	PIF_WCSR(PRC_ALARM_ACTION, 0ULL); /* Default everything to retry */
456 
457 	/* 18, init receive ring controller */
458 #if RX_MODE == RX_MODE_1
459 	val = RING_MODE_1;
460 #elif RX_MODE == RX_MODE_3
461 	val = RING_MODE_3;
462 #else /* RX_MODE == RX_MODE_5 */
463 	val = RING_MODE_5;
464 #endif
465 	PIF_WCSR(PRC_CTRL_0, RC_IN_SVC|val);
466 	/* leave 1-7 disabled */
467 	/* XXXX snoop configuration? */
468 
469 	/* 19, set chip memory assigned to the queue */
470 	PIF_WCSR(RX_QUEUE_CFG, MC_QUEUE(0, 64)); /* all 64M to queue 0 */
471 
472 	/* 20, setup RLDRAM parameters */
473 	/* do not touch it for now */
474 
475 	/* 21, setup pause frame thresholds */
476 	/* so not touch the defaults */
477 	/* XXX - must 0xff be written as stated in the manual? */
478 
479 	/* 22, configure RED */
480 	/* we do not want to drop packets, so ignore */
481 
482 	/* 23, initiate RLDRAM */
483 	val = PIF_RCSR(MC_RLDRAM_MRS);
484 	val |= MC_QUEUE_SIZE_ENABLE|MC_RLDRAM_MRS_ENABLE;
485 	PIF_WCSR(MC_RLDRAM_MRS, val);
486 	DELAY(1000);
487 
488 	/*
489 	 * Setup interrupt policies.
490 	 */
491 	/* 40, Transmit interrupts */
492 	PIF_WCSR(TTI_DATA1_MEM, TX_TIMER_VAL(0x1ff) | TX_TIMER_AC |
493 	    TX_URNG_A(5) | TX_URNG_B(20) | TX_URNG_C(48));
494 	PIF_WCSR(TTI_DATA2_MEM,
495 	    TX_UFC_A(25) | TX_UFC_B(64) | TX_UFC_C(128) | TX_UFC_D(512));
496 	PIF_WCSR(TTI_COMMAND_MEM, TTI_CMD_MEM_WE | TTI_CMD_MEM_STROBE);
497 	while (PIF_RCSR(TTI_COMMAND_MEM) & TTI_CMD_MEM_STROBE)
498 		;
499 
500 	/* 41, Receive interrupts */
501 	PIF_WCSR(RTI_DATA1_MEM, RX_TIMER_VAL(0x800) | RX_TIMER_AC |
502 	    RX_URNG_A(5) | RX_URNG_B(20) | RX_URNG_C(50));
503 	PIF_WCSR(RTI_DATA2_MEM,
504 	    RX_UFC_A(64) | RX_UFC_B(128) | RX_UFC_C(256) | RX_UFC_D(512));
505 	PIF_WCSR(RTI_COMMAND_MEM, RTI_CMD_MEM_WE | RTI_CMD_MEM_STROBE);
506 	while (PIF_RCSR(RTI_COMMAND_MEM) & RTI_CMD_MEM_STROBE)
507 		;
508 
509 	/*
510 	 * Setup media stuff.
511 	 */
512 	ifmedia_init(&sc->xena_media, IFM_IMASK, xge_xgmii_mediachange,
513 	    xge_ifmedia_status);
514 	ifmedia_add(&sc->xena_media, IFM_ETHER|IFM_10G_LR, 0, NULL);
515 	ifmedia_set(&sc->xena_media, IFM_ETHER|IFM_10G_LR);
516 
517 	aprint_normal("%s: Ethernet address %s\n", XNAME,
518 	    ether_sprintf(enaddr));
519 
520 	ifp = &sc->sc_ethercom.ec_if;
521 	strlcpy(ifp->if_xname, device_xname(sc->sc_dev), IFNAMSIZ);
522 	ifp->if_baudrate = 10000000000LL;
523 	ifp->if_init = xge_init;
524 	ifp->if_stop = xge_stop;
525 	ifp->if_softc = sc;
526 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
527 	ifp->if_ioctl = xge_ioctl;
528 	ifp->if_start = xge_start;
529 	IFQ_SET_MAXLEN(&ifp->if_snd, max(NTXDESCS - 1, IFQ_MAXLEN));
530 	IFQ_SET_READY(&ifp->if_snd);
531 
532 	/*
533 	 * Offloading capabilities.
534 	 */
535 	sc->sc_ethercom.ec_capabilities |=
536 	    ETHERCAP_JUMBO_MTU | ETHERCAP_VLAN_MTU;
537 	ifp->if_capabilities |=
538 	    IFCAP_CSUM_IPv4_Rx | IFCAP_CSUM_IPv4_Tx |
539 	    IFCAP_CSUM_TCPv4_Rx | IFCAP_CSUM_TCPv4_Tx |
540 	    IFCAP_CSUM_UDPv4_Rx | IFCAP_CSUM_UDPv4_Tx | IFCAP_TSOv4;
541 
542 	/*
543 	 * Attach the interface.
544 	 */
545 	if_attach(ifp);
546 	ether_ifattach(ifp, enaddr);
547 
548 	/*
549 	 * Setup interrupt vector before initializing.
550 	 */
551 	if (pci_intr_map(pa, &ih))
552 		return aprint_error_dev(sc->sc_dev, "unable to map interrupt\n");
553 	intrstr = pci_intr_string(pc, ih, intrbuf, sizeof(intrbuf));
554 	if ((sc->sc_ih =
555 	    pci_intr_establish(pc, ih, IPL_NET, xge_intr, sc)) == NULL)
556 		return aprint_error_dev(sc->sc_dev, "unable to establish interrupt at %s\n",
557 		    intrstr ? intrstr : "<unknown>");
558 	aprint_normal_dev(sc->sc_dev, "interrupting at %s\n", intrstr);
559 
560 #ifdef XGE_EVENT_COUNTERS
561 	evcnt_attach_dynamic(&sc->sc_intr, EVCNT_TYPE_MISC,
562 	    NULL, XNAME, "intr");
563 	evcnt_attach_dynamic(&sc->sc_txintr, EVCNT_TYPE_MISC,
564 	    NULL, XNAME, "txintr");
565 	evcnt_attach_dynamic(&sc->sc_rxintr, EVCNT_TYPE_MISC,
566 	    NULL, XNAME, "rxintr");
567 	evcnt_attach_dynamic(&sc->sc_txqe, EVCNT_TYPE_MISC,
568 	    NULL, XNAME, "txqe");
569 #endif
570 }
571 
572 void
573 xge_ifmedia_status(struct ifnet *ifp, struct ifmediareq *ifmr)
574 {
575 	struct xge_softc *sc = ifp->if_softc;
576 	uint64_t reg;
577 
578 	ifmr->ifm_status = IFM_AVALID;
579 	ifmr->ifm_active = IFM_ETHER|IFM_10G_LR;
580 
581 	reg = PIF_RCSR(ADAPTER_STATUS);
582 	if ((reg & (RMAC_REMOTE_FAULT|RMAC_LOCAL_FAULT)) == 0)
583 		ifmr->ifm_status |= IFM_ACTIVE;
584 }
585 
586 int
587 xge_xgmii_mediachange(struct ifnet *ifp)
588 {
589 	return 0;
590 }
591 
592 static void
593 xge_enable(struct xge_softc *sc)
594 {
595 	uint64_t val;
596 
597 	/* 2, enable adapter */
598 	val = PIF_RCSR(ADAPTER_CONTROL);
599 	val |= ADAPTER_EN;
600 	PIF_WCSR(ADAPTER_CONTROL, val);
601 
602 	/* 3, light the card enable led */
603 	val = PIF_RCSR(ADAPTER_CONTROL);
604 	val |= LED_ON;
605 	PIF_WCSR(ADAPTER_CONTROL, val);
606 	printf("%s: link up\n", XNAME);
607 
608 }
609 
610 int
611 xge_init(struct ifnet *ifp)
612 {
613 	struct xge_softc *sc = ifp->if_softc;
614 	uint64_t val;
615 
616 	if (ifp->if_flags & IFF_RUNNING)
617 		return 0;
618 
619 	/* 31+32, setup MAC config */
620 	PIF_WKEY(MAC_CFG, TMAC_EN|RMAC_EN|TMAC_APPEND_PAD|RMAC_STRIP_FCS|
621 	    RMAC_BCAST_EN|RMAC_DISCARD_PFRM|RMAC_PROM_EN);
622 
623 	DELAY(1000);
624 
625 	/* 54, ensure that the adapter is 'quiescent' */
626 	val = PIF_RCSR(ADAPTER_STATUS);
627 	if ((val & QUIESCENT) != QUIESCENT) {
628 		char buf[200];
629 		printf("%s: adapter not quiescent, aborting\n", XNAME);
630 		val = (val & QUIESCENT) ^ QUIESCENT;
631 		snprintb(buf, sizeof buf, QUIESCENT_BMSK, val);
632 		printf("%s: ADAPTER_STATUS missing bits %s\n", XNAME, buf);
633 		return 1;
634 	}
635 
636 	/* 56, enable the transmit laser */
637 	val = PIF_RCSR(ADAPTER_CONTROL);
638 	val |= EOI_TX_ON;
639 	PIF_WCSR(ADAPTER_CONTROL, val);
640 
641 	xge_enable(sc);
642 	/*
643 	 * Enable all interrupts
644 	 */
645 	PIF_WCSR(TX_TRAFFIC_MASK, 0);
646 	PIF_WCSR(RX_TRAFFIC_MASK, 0);
647 	PIF_WCSR(GENERAL_INT_MASK, 0);
648 	PIF_WCSR(TXPIC_INT_MASK, 0);
649 	PIF_WCSR(RXPIC_INT_MASK, 0);
650 	PIF_WCSR(MAC_INT_MASK, MAC_TMAC_INT); /* only from RMAC */
651 	PIF_WCSR(MAC_RMAC_ERR_MASK, ~RMAC_LINK_STATE_CHANGE_INT);
652 
653 
654 	/* Done... */
655 	ifp->if_flags |= IFF_RUNNING;
656 	ifp->if_flags &= ~IFF_OACTIVE;
657 
658 	return 0;
659 }
660 
661 static void
662 xge_stop(struct ifnet *ifp, int disable)
663 {
664 	struct xge_softc *sc = ifp->if_softc;
665 	uint64_t val;
666 
667 	val = PIF_RCSR(ADAPTER_CONTROL);
668 	val &= ~ADAPTER_EN;
669 	PIF_WCSR(ADAPTER_CONTROL, val);
670 
671 	while ((PIF_RCSR(ADAPTER_STATUS) & QUIESCENT) != QUIESCENT)
672 		;
673 }
674 
675 int
676 xge_intr(void *pv)
677 {
678 	struct xge_softc *sc = pv;
679 	struct txd *txd;
680 	struct ifnet *ifp = &sc->sc_if;
681 	bus_dmamap_t dmp;
682 	uint64_t val;
683 	int i, lasttx, plen;
684 
685 	val = PIF_RCSR(GENERAL_INT_STATUS);
686 	if (val == 0)
687 		return 0; /* no interrupt here */
688 
689 	XGE_EVCNT_INCR(&sc->sc_intr);
690 
691 	PIF_WCSR(GENERAL_INT_STATUS, val);
692 
693 	if ((val = PIF_RCSR(MAC_RMAC_ERR_REG)) & RMAC_LINK_STATE_CHANGE_INT) {
694 		/* Wait for quiescence */
695 		printf("%s: link down\n", XNAME);
696 		while ((PIF_RCSR(ADAPTER_STATUS) & QUIESCENT) != QUIESCENT)
697 			;
698 		PIF_WCSR(MAC_RMAC_ERR_REG, RMAC_LINK_STATE_CHANGE_INT);
699 
700 		val = PIF_RCSR(ADAPTER_STATUS);
701 		if ((val & (RMAC_REMOTE_FAULT|RMAC_LOCAL_FAULT)) == 0)
702 			xge_enable(sc); /* Only if link restored */
703 	}
704 
705 	if ((val = PIF_RCSR(TX_TRAFFIC_INT))) {
706 		XGE_EVCNT_INCR(&sc->sc_txintr);
707 		PIF_WCSR(TX_TRAFFIC_INT, val); /* clear interrupt bits */
708 	}
709 	/*
710 	 * Collect sent packets.
711 	 */
712 	lasttx = sc->sc_lasttx;
713 	while ((i = NEXTTX(sc->sc_lasttx)) != sc->sc_nexttx) {
714 		txd = sc->sc_txd[i];
715 		dmp = sc->sc_txm[i];
716 
717 		bus_dmamap_sync(sc->sc_dmat, dmp, 0,
718 		    dmp->dm_mapsize,
719 		    BUS_DMASYNC_POSTREAD|BUS_DMASYNC_POSTWRITE);
720 
721 		if (txd->txd_control1 & TXD_CTL1_OWN) {
722 			bus_dmamap_sync(sc->sc_dmat, dmp, 0,
723 			    dmp->dm_mapsize, BUS_DMASYNC_PREREAD);
724 			break;
725 		}
726 		bus_dmamap_unload(sc->sc_dmat, dmp);
727 		m_freem(sc->sc_txb[i]);
728 		ifp->if_opackets++;
729 		sc->sc_lasttx = i;
730 	}
731 	if (i == sc->sc_nexttx) {
732 		XGE_EVCNT_INCR(&sc->sc_txqe);
733 	}
734 
735 	if (sc->sc_lasttx != lasttx)
736 		ifp->if_flags &= ~IFF_OACTIVE;
737 
738 	xge_start(ifp); /* Try to get more packets on the wire */
739 
740 	if ((val = PIF_RCSR(RX_TRAFFIC_INT))) {
741 		XGE_EVCNT_INCR(&sc->sc_rxintr);
742 		PIF_WCSR(RX_TRAFFIC_INT, val); /* clear interrupt bits */
743 	}
744 
745 	for (;;) {
746 		struct rxdesc *rxd;
747 		struct mbuf *m;
748 
749 		XGE_RXSYNC(sc->sc_nextrx,
750 		    BUS_DMASYNC_POSTREAD|BUS_DMASYNC_POSTWRITE);
751 
752 		rxd = XGE_RXD(sc->sc_nextrx);
753 		if (rxd->rxd_control1 & RXD_CTL1_OWN) {
754 			XGE_RXSYNC(sc->sc_nextrx, BUS_DMASYNC_PREREAD);
755 			break;
756 		}
757 
758 		/* got a packet */
759 		m = sc->sc_rxb[sc->sc_nextrx];
760 #if RX_MODE == RX_MODE_1
761 		plen = m->m_len = RXD_CTL2_BUF0SIZ(rxd->rxd_control2);
762 #elif RX_MODE == RX_MODE_3
763 #error Fix rxmodes in xge_intr
764 #elif RX_MODE == RX_MODE_5
765 		plen = m->m_len = RXD_CTL2_BUF0SIZ(rxd->rxd_control2);
766 		plen += m->m_next->m_len = RXD_CTL2_BUF1SIZ(rxd->rxd_control2);
767 		plen += m->m_next->m_next->m_len =
768 		    RXD_CTL2_BUF2SIZ(rxd->rxd_control2);
769 		plen += m->m_next->m_next->m_next->m_len =
770 		    RXD_CTL3_BUF3SIZ(rxd->rxd_control3);
771 		plen += m->m_next->m_next->m_next->m_next->m_len =
772 		    RXD_CTL3_BUF4SIZ(rxd->rxd_control3);
773 #endif
774 		m->m_pkthdr.rcvif = ifp;
775 		m->m_pkthdr.len = plen;
776 
777 		val = rxd->rxd_control1;
778 
779 		if (xge_add_rxbuf(sc, sc->sc_nextrx)) {
780 			/* Failed, recycle this mbuf */
781 #if RX_MODE == RX_MODE_1
782 			rxd->rxd_control2 = RXD_MKCTL2(MCLBYTES, 0, 0);
783 			rxd->rxd_control1 = RXD_CTL1_OWN;
784 #elif RX_MODE == RX_MODE_3
785 #elif RX_MODE == RX_MODE_5
786 #endif
787 			XGE_RXSYNC(sc->sc_nextrx,
788 			    BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE);
789 			ifp->if_ierrors++;
790 			break;
791 		}
792 
793 		ifp->if_ipackets++;
794 
795 		if (RXD_CTL1_PROTOS(val) & (RXD_CTL1_P_IPv4|RXD_CTL1_P_IPv6)) {
796 			m->m_pkthdr.csum_flags |= M_CSUM_IPv4;
797 			if (RXD_CTL1_L3CSUM(val) != 0xffff)
798 				m->m_pkthdr.csum_flags |= M_CSUM_IPv4_BAD;
799 		}
800 		if (RXD_CTL1_PROTOS(val) & RXD_CTL1_P_TCP) {
801 			m->m_pkthdr.csum_flags |= M_CSUM_TCPv4|M_CSUM_TCPv6;
802 			if (RXD_CTL1_L4CSUM(val) != 0xffff)
803 				m->m_pkthdr.csum_flags |= M_CSUM_TCP_UDP_BAD;
804 		}
805 		if (RXD_CTL1_PROTOS(val) & RXD_CTL1_P_UDP) {
806 			m->m_pkthdr.csum_flags |= M_CSUM_UDPv4|M_CSUM_UDPv6;
807 			if (RXD_CTL1_L4CSUM(val) != 0xffff)
808 				m->m_pkthdr.csum_flags |= M_CSUM_TCP_UDP_BAD;
809 		}
810 
811 		bpf_mtap(ifp, m);
812 
813 		(*ifp->if_input)(ifp, m);
814 
815 		if (++sc->sc_nextrx == NRXREAL)
816 			sc->sc_nextrx = 0;
817 
818 	}
819 
820 	return 0;
821 }
822 
823 int
824 xge_ioctl(struct ifnet *ifp, u_long cmd, void *data)
825 {
826 	struct xge_softc *sc = ifp->if_softc;
827 	struct ifreq *ifr = (struct ifreq *) data;
828 	int s, error = 0;
829 
830 	s = splnet();
831 
832 	switch (cmd) {
833 	case SIOCSIFMTU:
834 		if (ifr->ifr_mtu < ETHERMIN || ifr->ifr_mtu > XGE_MAX_MTU)
835 			error = EINVAL;
836 		else if ((error = ifioctl_common(ifp, cmd, data)) == ENETRESET){
837 			PIF_WCSR(RMAC_MAX_PYLD_LEN,
838 			    RMAC_PYLD_LEN(ifr->ifr_mtu));
839 			error = 0;
840 		}
841 		break;
842 
843 	case SIOCGIFMEDIA:
844 	case SIOCSIFMEDIA:
845 		error = ifmedia_ioctl(ifp, ifr, &sc->xena_media, cmd);
846 		break;
847 
848 	default:
849 		if ((error = ether_ioctl(ifp, cmd, data)) != ENETRESET)
850 			break;
851 
852 		error = 0;
853 
854 		if (cmd != SIOCADDMULTI && cmd != SIOCDELMULTI)
855 			;
856 		else if (ifp->if_flags & IFF_RUNNING) {
857 			/* Change multicast list */
858 			xge_mcast_filter(sc);
859 		}
860 		break;
861 	}
862 
863 	splx(s);
864 	return(error);
865 }
866 
867 void
868 xge_mcast_filter(struct xge_softc *sc)
869 {
870 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
871 	struct ethercom *ec = &sc->sc_ethercom;
872 	struct ether_multi *enm;
873 	struct ether_multistep step;
874 	int i, numaddr = 1; /* first slot used for card unicast address */
875 	uint64_t val;
876 
877 	ETHER_FIRST_MULTI(step, ec, enm);
878 	while (enm != NULL) {
879 		if (memcmp(enm->enm_addrlo, enm->enm_addrhi, ETHER_ADDR_LEN)) {
880 			/* Skip ranges */
881 			goto allmulti;
882 		}
883 		if (numaddr == MAX_MCAST_ADDR)
884 			goto allmulti;
885 		for (val = 0, i = 0; i < ETHER_ADDR_LEN; i++) {
886 			val <<= 8;
887 			val |= enm->enm_addrlo[i];
888 		}
889 		PIF_WCSR(RMAC_ADDR_DATA0_MEM, val << 16);
890 		PIF_WCSR(RMAC_ADDR_DATA1_MEM, 0xFFFFFFFFFFFFFFFFULL);
891 		PIF_WCSR(RMAC_ADDR_CMD_MEM, RMAC_ADDR_CMD_MEM_WE|
892 		    RMAC_ADDR_CMD_MEM_STR|RMAC_ADDR_CMD_MEM_OFF(numaddr));
893 		while (PIF_RCSR(RMAC_ADDR_CMD_MEM) & RMAC_ADDR_CMD_MEM_STR)
894 			;
895 		numaddr++;
896 		ETHER_NEXT_MULTI(step, enm);
897 	}
898 	/* set the remaining entries to the broadcast address */
899 	for (i = numaddr; i < MAX_MCAST_ADDR; i++) {
900 		PIF_WCSR(RMAC_ADDR_DATA0_MEM, 0xffffffffffff0000ULL);
901 		PIF_WCSR(RMAC_ADDR_DATA1_MEM, 0xFFFFFFFFFFFFFFFFULL);
902 		PIF_WCSR(RMAC_ADDR_CMD_MEM, RMAC_ADDR_CMD_MEM_WE|
903 		    RMAC_ADDR_CMD_MEM_STR|RMAC_ADDR_CMD_MEM_OFF(i));
904 		while (PIF_RCSR(RMAC_ADDR_CMD_MEM) & RMAC_ADDR_CMD_MEM_STR)
905 			;
906 	}
907 	ifp->if_flags &= ~IFF_ALLMULTI;
908 	return;
909 
910 allmulti:
911 	/* Just receive everything with the multicast bit set */
912 	ifp->if_flags |= IFF_ALLMULTI;
913 	PIF_WCSR(RMAC_ADDR_DATA0_MEM, 0x8000000000000000ULL);
914 	PIF_WCSR(RMAC_ADDR_DATA1_MEM, 0xF000000000000000ULL);
915 	PIF_WCSR(RMAC_ADDR_CMD_MEM, RMAC_ADDR_CMD_MEM_WE|
916 	    RMAC_ADDR_CMD_MEM_STR|RMAC_ADDR_CMD_MEM_OFF(1));
917 	while (PIF_RCSR(RMAC_ADDR_CMD_MEM) & RMAC_ADDR_CMD_MEM_STR)
918 		;
919 }
920 
921 void
922 xge_start(struct ifnet *ifp)
923 {
924 	struct xge_softc *sc = ifp->if_softc;
925 	struct txd *txd = NULL; /* XXX - gcc */
926 	bus_dmamap_t dmp;
927 	struct	mbuf *m;
928 	uint64_t par, lcr;
929 	int nexttx = 0, ntxd, error, i;
930 
931 	if ((ifp->if_flags & (IFF_RUNNING|IFF_OACTIVE)) != IFF_RUNNING)
932 		return;
933 
934 	par = lcr = 0;
935 	for (;;) {
936 		IFQ_POLL(&ifp->if_snd, m);
937 		if (m == NULL)
938 			break;	/* out of packets */
939 
940 		if (sc->sc_nexttx == sc->sc_lasttx)
941 			break;	/* No more space */
942 
943 		nexttx = sc->sc_nexttx;
944 		dmp = sc->sc_txm[nexttx];
945 
946 		if ((error = bus_dmamap_load_mbuf(sc->sc_dmat, dmp, m,
947 		    BUS_DMA_WRITE|BUS_DMA_NOWAIT)) != 0) {
948 			printf("%s: bus_dmamap_load_mbuf error %d\n",
949 			    XNAME, error);
950 			break;
951 		}
952 		IFQ_DEQUEUE(&ifp->if_snd, m);
953 
954 		bus_dmamap_sync(sc->sc_dmat, dmp, 0, dmp->dm_mapsize,
955 		    BUS_DMASYNC_PREWRITE);
956 
957 		txd = sc->sc_txd[nexttx];
958 		sc->sc_txb[nexttx] = m;
959 		for (i = 0; i < dmp->dm_nsegs; i++) {
960 			if (dmp->dm_segs[i].ds_len == 0)
961 				continue;
962 			txd->txd_control1 = dmp->dm_segs[i].ds_len;
963 			txd->txd_control2 = 0;
964 			txd->txd_bufaddr = dmp->dm_segs[i].ds_addr;
965 			txd++;
966 		}
967 		ntxd = txd - sc->sc_txd[nexttx] - 1;
968 		txd = sc->sc_txd[nexttx];
969 		txd->txd_control1 |= TXD_CTL1_OWN|TXD_CTL1_GCF;
970 		txd->txd_control2 = TXD_CTL2_UTIL;
971 		if (m->m_pkthdr.csum_flags & M_CSUM_TSOv4) {
972 			txd->txd_control1 |= TXD_CTL1_MSS(m->m_pkthdr.segsz);
973 			txd->txd_control1 |= TXD_CTL1_LSO;
974 		}
975 
976 		if (m->m_pkthdr.csum_flags & M_CSUM_IPv4)
977 			txd->txd_control2 |= TXD_CTL2_CIPv4;
978 		if (m->m_pkthdr.csum_flags & M_CSUM_TCPv4)
979 			txd->txd_control2 |= TXD_CTL2_CTCP;
980 		if (m->m_pkthdr.csum_flags & M_CSUM_UDPv4)
981 			txd->txd_control2 |= TXD_CTL2_CUDP;
982 		txd[ntxd].txd_control1 |= TXD_CTL1_GCL;
983 
984 		bus_dmamap_sync(sc->sc_dmat, dmp, 0, dmp->dm_mapsize,
985 		    BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE);
986 
987 		par = sc->sc_txdp[nexttx];
988 		lcr = TXDL_NUMTXD(ntxd) | TXDL_LGC_FIRST | TXDL_LGC_LAST;
989 		if (m->m_pkthdr.csum_flags & M_CSUM_TSOv4)
990 			lcr |= TXDL_SFF;
991 		TXP_WCSR(TXDL_PAR, par);
992 		TXP_WCSR(TXDL_LCR, lcr);
993 
994 		bpf_mtap(ifp, m);
995 
996 		sc->sc_nexttx = NEXTTX(nexttx);
997 	}
998 }
999 
1000 /*
1001  * Allocate DMA memory for transmit descriptor fragments.
1002  * Only one map is used for all descriptors.
1003  */
1004 int
1005 xge_alloc_txmem(struct xge_softc *sc)
1006 {
1007 	struct txd *txp;
1008 	bus_dma_segment_t seg;
1009 	bus_addr_t txdp;
1010 	void *kva;
1011 	int i, rseg, state;
1012 
1013 #define TXMAPSZ (NTXDESCS*NTXFRAGS*sizeof(struct txd))
1014 	state = 0;
1015 	if (bus_dmamem_alloc(sc->sc_dmat, TXMAPSZ, PAGE_SIZE, 0,
1016 	    &seg, 1, &rseg, BUS_DMA_NOWAIT))
1017 		goto err;
1018 	state++;
1019 	if (bus_dmamem_map(sc->sc_dmat, &seg, rseg, TXMAPSZ, &kva,
1020 	    BUS_DMA_NOWAIT))
1021 		goto err;
1022 
1023 	state++;
1024 	if (bus_dmamap_create(sc->sc_dmat, TXMAPSZ, 1, TXMAPSZ, 0,
1025 	    BUS_DMA_NOWAIT, &sc->sc_txmap))
1026 		goto err;
1027 	state++;
1028 	if (bus_dmamap_load(sc->sc_dmat, sc->sc_txmap,
1029 	    kva, TXMAPSZ, NULL, BUS_DMA_NOWAIT))
1030 		goto err;
1031 
1032 	/* setup transmit array pointers */
1033 	txp = (struct txd *)kva;
1034 	txdp = seg.ds_addr;
1035 	for (txp = (struct txd *)kva, i = 0; i < NTXDESCS; i++) {
1036 		sc->sc_txd[i] = txp;
1037 		sc->sc_txdp[i] = txdp;
1038 		txp += NTXFRAGS;
1039 		txdp += (NTXFRAGS * sizeof(struct txd));
1040 	}
1041 
1042 	return 0;
1043 
1044 err:
1045 	if (state > 2)
1046 		bus_dmamap_destroy(sc->sc_dmat, sc->sc_txmap);
1047 	if (state > 1)
1048 		bus_dmamem_unmap(sc->sc_dmat, kva, TXMAPSZ);
1049 	if (state > 0)
1050 		bus_dmamem_free(sc->sc_dmat, &seg, rseg);
1051 	return ENOBUFS;
1052 }
1053 
1054 /*
1055  * Allocate DMA memory for receive descriptor,
1056  * only one map is used for all descriptors.
1057  * link receive descriptor pages together.
1058  */
1059 int
1060 xge_alloc_rxmem(struct xge_softc *sc)
1061 {
1062 	struct rxd_4k *rxpp;
1063 	bus_dma_segment_t seg;
1064 	void *kva;
1065 	int i, rseg, state;
1066 
1067 	/* sanity check */
1068 	if (sizeof(struct rxd_4k) != XGE_PAGE) {
1069 		printf("bad compiler struct alignment, %d != %d\n",
1070 		    (int)sizeof(struct rxd_4k), XGE_PAGE);
1071 		return EINVAL;
1072 	}
1073 
1074 	state = 0;
1075 	if (bus_dmamem_alloc(sc->sc_dmat, RXMAPSZ, PAGE_SIZE, 0,
1076 	    &seg, 1, &rseg, BUS_DMA_NOWAIT))
1077 		goto err;
1078 	state++;
1079 	if (bus_dmamem_map(sc->sc_dmat, &seg, rseg, RXMAPSZ, &kva,
1080 	    BUS_DMA_NOWAIT))
1081 		goto err;
1082 
1083 	state++;
1084 	if (bus_dmamap_create(sc->sc_dmat, RXMAPSZ, 1, RXMAPSZ, 0,
1085 	    BUS_DMA_NOWAIT, &sc->sc_rxmap))
1086 		goto err;
1087 	state++;
1088 	if (bus_dmamap_load(sc->sc_dmat, sc->sc_rxmap,
1089 	    kva, RXMAPSZ, NULL, BUS_DMA_NOWAIT))
1090 		goto err;
1091 
1092 	/* setup receive page link pointers */
1093 	for (rxpp = (struct rxd_4k *)kva, i = 0; i < NRXPAGES; i++, rxpp++) {
1094 		sc->sc_rxd_4k[i] = rxpp;
1095 		rxpp->r4_next = (uint64_t)sc->sc_rxmap->dm_segs[0].ds_addr +
1096 		    (i*sizeof(struct rxd_4k)) + sizeof(struct rxd_4k);
1097 	}
1098 	sc->sc_rxd_4k[NRXPAGES-1]->r4_next =
1099 	    (uint64_t)sc->sc_rxmap->dm_segs[0].ds_addr;
1100 
1101 	return 0;
1102 
1103 err:
1104 	if (state > 2)
1105 		bus_dmamap_destroy(sc->sc_dmat, sc->sc_txmap);
1106 	if (state > 1)
1107 		bus_dmamem_unmap(sc->sc_dmat, kva, TXMAPSZ);
1108 	if (state > 0)
1109 		bus_dmamem_free(sc->sc_dmat, &seg, rseg);
1110 	return ENOBUFS;
1111 }
1112 
1113 
1114 /*
1115  * Add a new mbuf chain to descriptor id.
1116  */
1117 int
1118 xge_add_rxbuf(struct xge_softc *sc, int id)
1119 {
1120 	struct rxdesc *rxd;
1121 	struct mbuf *m[5];
1122 	int page, desc, error;
1123 #if RX_MODE == RX_MODE_5
1124 	int i;
1125 #endif
1126 
1127 	page = id/NDESC_BUFMODE;
1128 	desc = id%NDESC_BUFMODE;
1129 
1130 	rxd = &sc->sc_rxd_4k[page]->r4_rxd[desc];
1131 
1132 	/*
1133 	 * Allocate mbufs.
1134 	 * Currently five mbufs and two clusters are used,
1135 	 * the hardware will put (ethernet, ip, tcp/udp) headers in
1136 	 * their own buffer and the clusters are only used for data.
1137 	 */
1138 #if RX_MODE == RX_MODE_1
1139 	MGETHDR(m[0], M_DONTWAIT, MT_DATA);
1140 	if (m[0] == NULL)
1141 		return ENOBUFS;
1142 	MCLGET(m[0], M_DONTWAIT);
1143 	if ((m[0]->m_flags & M_EXT) == 0) {
1144 		m_freem(m[0]);
1145 		return ENOBUFS;
1146 	}
1147 	m[0]->m_len = m[0]->m_pkthdr.len = m[0]->m_ext.ext_size;
1148 #elif RX_MODE == RX_MODE_3
1149 #error missing rxmode 3.
1150 #elif RX_MODE == RX_MODE_5
1151 	MGETHDR(m[0], M_DONTWAIT, MT_DATA);
1152 	for (i = 1; i < 5; i++) {
1153 		MGET(m[i], M_DONTWAIT, MT_DATA);
1154 	}
1155 	if (m[3])
1156 		MCLGET(m[3], M_DONTWAIT);
1157 	if (m[4])
1158 		MCLGET(m[4], M_DONTWAIT);
1159 	if (!m[0] || !m[1] || !m[2] || !m[3] || !m[4] ||
1160 	    ((m[3]->m_flags & M_EXT) == 0) || ((m[4]->m_flags & M_EXT) == 0)) {
1161 		/* Out of something */
1162 		for (i = 0; i < 5; i++)
1163 			if (m[i] != NULL)
1164 				m_free(m[i]);
1165 		return ENOBUFS;
1166 	}
1167 	/* Link'em together */
1168 	m[0]->m_next = m[1];
1169 	m[1]->m_next = m[2];
1170 	m[2]->m_next = m[3];
1171 	m[3]->m_next = m[4];
1172 #else
1173 #error bad mode RX_MODE
1174 #endif
1175 
1176 	if (sc->sc_rxb[id])
1177 		bus_dmamap_unload(sc->sc_dmat, sc->sc_rxm[id]);
1178 	sc->sc_rxb[id] = m[0];
1179 
1180 	error = bus_dmamap_load_mbuf(sc->sc_dmat, sc->sc_rxm[id], m[0],
1181 	    BUS_DMA_READ|BUS_DMA_NOWAIT);
1182 	if (error)
1183 		return error;
1184 	bus_dmamap_sync(sc->sc_dmat, sc->sc_rxm[id], 0,
1185 	    sc->sc_rxm[id]->dm_mapsize, BUS_DMASYNC_PREREAD);
1186 
1187 #if RX_MODE == RX_MODE_1
1188 	rxd->rxd_control2 = RXD_MKCTL2(m[0]->m_len, 0, 0);
1189 	rxd->rxd_buf0 = (uint64_t)sc->sc_rxm[id]->dm_segs[0].ds_addr;
1190 	rxd->rxd_control1 = RXD_CTL1_OWN;
1191 #elif RX_MODE == RX_MODE_3
1192 #elif RX_MODE == RX_MODE_5
1193 	rxd->rxd_control3 = RXD_MKCTL3(0, m[3]->m_len, m[4]->m_len);
1194 	rxd->rxd_control2 = RXD_MKCTL2(m[0]->m_len, m[1]->m_len, m[2]->m_len);
1195 	rxd->rxd_buf0 = (uint64_t)sc->sc_rxm[id]->dm_segs[0].ds_addr;
1196 	rxd->rxd_buf1 = (uint64_t)sc->sc_rxm[id]->dm_segs[1].ds_addr;
1197 	rxd->rxd_buf2 = (uint64_t)sc->sc_rxm[id]->dm_segs[2].ds_addr;
1198 	rxd->rxd_buf3 = (uint64_t)sc->sc_rxm[id]->dm_segs[3].ds_addr;
1199 	rxd->rxd_buf4 = (uint64_t)sc->sc_rxm[id]->dm_segs[4].ds_addr;
1200 	rxd->rxd_control1 = RXD_CTL1_OWN;
1201 #endif
1202 
1203 	XGE_RXSYNC(id, BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE);
1204 	return 0;
1205 }
1206 
1207 /*
1208  * These magics comes from the FreeBSD driver.
1209  */
1210 int
1211 xge_setup_xgxs(struct xge_softc *sc)
1212 {
1213 	/* The magic numbers are described in the users guide */
1214 
1215 	/* Writing to MDIO 0x8000 (Global Config 0) */
1216 	PIF_WCSR(DTX_CONTROL, 0x8000051500000000ULL); DELAY(50);
1217 	PIF_WCSR(DTX_CONTROL, 0x80000515000000E0ULL); DELAY(50);
1218 	PIF_WCSR(DTX_CONTROL, 0x80000515D93500E4ULL); DELAY(50);
1219 
1220 	/* Writing to MDIO 0x8000 (Global Config 1) */
1221 	PIF_WCSR(DTX_CONTROL, 0x8001051500000000ULL); DELAY(50);
1222 	PIF_WCSR(DTX_CONTROL, 0x80010515000000e0ULL); DELAY(50);
1223 	PIF_WCSR(DTX_CONTROL, 0x80010515001e00e4ULL); DELAY(50);
1224 
1225 	/* Reset the Gigablaze */
1226 	PIF_WCSR(DTX_CONTROL, 0x8002051500000000ULL); DELAY(50);
1227 	PIF_WCSR(DTX_CONTROL, 0x80020515000000E0ULL); DELAY(50);
1228 	PIF_WCSR(DTX_CONTROL, 0x80020515F21000E4ULL); DELAY(50);
1229 
1230 	/* read the pole settings */
1231 	PIF_WCSR(DTX_CONTROL, 0x8000051500000000ULL); DELAY(50);
1232 	PIF_WCSR(DTX_CONTROL, 0x80000515000000e0ULL); DELAY(50);
1233 	PIF_WCSR(DTX_CONTROL, 0x80000515000000ecULL); DELAY(50);
1234 
1235 	PIF_WCSR(DTX_CONTROL, 0x8001051500000000ULL); DELAY(50);
1236 	PIF_WCSR(DTX_CONTROL, 0x80010515000000e0ULL); DELAY(50);
1237 	PIF_WCSR(DTX_CONTROL, 0x80010515000000ecULL); DELAY(50);
1238 
1239 	PIF_WCSR(DTX_CONTROL, 0x8002051500000000ULL); DELAY(50);
1240 	PIF_WCSR(DTX_CONTROL, 0x80020515000000e0ULL); DELAY(50);
1241 	PIF_WCSR(DTX_CONTROL, 0x80020515000000ecULL); DELAY(50);
1242 
1243 	/* Workaround for TX Lane XAUI initialization error.
1244 	   Read Xpak PHY register 24 for XAUI lane status */
1245 	PIF_WCSR(DTX_CONTROL, 0x0018040000000000ULL); DELAY(50);
1246 	PIF_WCSR(DTX_CONTROL, 0x00180400000000e0ULL); DELAY(50);
1247 	PIF_WCSR(DTX_CONTROL, 0x00180400000000ecULL); DELAY(50);
1248 
1249 	/*
1250 	 * Reading the MDIO control with value 0x1804001c0F001c
1251 	 * means the TxLanes were already in sync
1252 	 * Reading the MDIO control with value 0x1804000c0x001c
1253 	 * means some TxLanes are not in sync where x is a 4-bit
1254 	 * value representing each lanes
1255 	 */
1256 #if 0
1257 	val = PIF_RCSR(MDIO_CONTROL);
1258 	if (val != 0x1804001c0F001cULL) {
1259 		printf("%s: MDIO_CONTROL: %llx != %llx\n",
1260 		    XNAME, val, 0x1804001c0F001cULL);
1261 		return 1;
1262 	}
1263 #endif
1264 
1265 	/* Set and remove the DTE XS INTLoopBackN */
1266 	PIF_WCSR(DTX_CONTROL, 0x0000051500000000ULL); DELAY(50);
1267 	PIF_WCSR(DTX_CONTROL, 0x00000515604000e0ULL); DELAY(50);
1268 	PIF_WCSR(DTX_CONTROL, 0x00000515604000e4ULL); DELAY(50);
1269 	PIF_WCSR(DTX_CONTROL, 0x00000515204000e4ULL); DELAY(50);
1270 	PIF_WCSR(DTX_CONTROL, 0x00000515204000ecULL); DELAY(50);
1271 
1272 #if 0
1273 	/* Reading the DTX control register Should be 0x5152040001c */
1274 	val = PIF_RCSR(DTX_CONTROL);
1275 	if (val != 0x5152040001cULL) {
1276 		printf("%s: DTX_CONTROL: %llx != %llx\n",
1277 		    XNAME, val, 0x5152040001cULL);
1278 		return 1;
1279 	}
1280 #endif
1281 
1282 	PIF_WCSR(MDIO_CONTROL, 0x0018040000000000ULL); DELAY(50);
1283 	PIF_WCSR(MDIO_CONTROL, 0x00180400000000e0ULL); DELAY(50);
1284 	PIF_WCSR(MDIO_CONTROL, 0x00180400000000ecULL); DELAY(50);
1285 
1286 #if 0
1287 	/* Reading the MIOD control should be 0x1804001c0f001c */
1288 	val = PIF_RCSR(MDIO_CONTROL);
1289 	if (val != 0x1804001c0f001cULL) {
1290 		printf("%s: MDIO_CONTROL2: %llx != %llx\n",
1291 		    XNAME, val, 0x1804001c0f001cULL);
1292 		return 1;
1293 	}
1294 #endif
1295 	return 0;
1296 }
1297