xref: /netbsd-src/sys/dev/pci/if_xge.c (revision 404fbe5fb94ca1e054339640cabb2801ce52dd30)
1 /*      $NetBSD: if_xge.c,v 1.10 2008/12/16 22:35:33 christos Exp $ */
2 
3 /*
4  * Copyright (c) 2004, SUNET, Swedish University Computer Network.
5  * All rights reserved.
6  *
7  * Written by Anders Magnusson for SUNET, Swedish University Computer Network.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. All advertising materials mentioning features or use of this software
18  *    must display the following acknowledgement:
19  *      This product includes software developed for the NetBSD Project by
20  *      SUNET, Swedish University Computer Network.
21  * 4. The name of SUNET may not be used to endorse or promote products
22  *    derived from this software without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY SUNET ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
26  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
27  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL SUNET
28  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34  * POSSIBILITY OF SUCH DAMAGE.
35  */
36 
37 /*
38  * Device driver for the S2io Xframe Ten Gigabit Ethernet controller.
39  *
40  * TODO (in no specific order):
41  *	HW VLAN support.
42  *	IPv6 HW cksum.
43  */
44 
45 #include <sys/cdefs.h>
46 __KERNEL_RCSID(0, "$NetBSD: if_xge.c,v 1.10 2008/12/16 22:35:33 christos Exp $");
47 
48 #include "bpfilter.h"
49 #include "rnd.h"
50 
51 #include <sys/param.h>
52 #include <sys/systm.h>
53 #include <sys/mbuf.h>
54 #include <sys/malloc.h>
55 #include <sys/kernel.h>
56 #include <sys/socket.h>
57 #include <sys/device.h>
58 
59 #if NRND > 0
60 #include <sys/rnd.h>
61 #endif
62 
63 #include <net/if.h>
64 #include <net/if_dl.h>
65 #include <net/if_media.h>
66 #include <net/if_ether.h>
67 
68 #if NBPFILTER > 0
69 #include <net/bpf.h>
70 #endif
71 
72 #include <sys/bus.h>
73 #include <sys/intr.h>
74 #include <machine/endian.h>
75 
76 #include <dev/mii/mii.h>
77 #include <dev/mii/miivar.h>
78 
79 #include <dev/pci/pcivar.h>
80 #include <dev/pci/pcireg.h>
81 #include <dev/pci/pcidevs.h>
82 
83 #include <sys/proc.h>
84 
85 #include <dev/pci/if_xgereg.h>
86 
87 /*
88  * Some tunable constants, tune with care!
89  */
90 #define RX_MODE		RX_MODE_1  /* Receive mode (buffer usage, see below) */
91 #define NRXDESCS	1016	   /* # of receive descriptors (requested) */
92 #define NTXDESCS	8192	   /* Number of transmit descriptors */
93 #define NTXFRAGS	100	   /* Max fragments per packet */
94 #define XGE_EVENT_COUNTERS	   /* Instrumentation */
95 
96 /*
97  * Receive buffer modes; 1, 3 or 5 buffers.
98  */
99 #define RX_MODE_1 1
100 #define RX_MODE_3 3
101 #define RX_MODE_5 5
102 
103 /*
104  * Use clever macros to avoid a bunch of #ifdef's.
105  */
106 #define XCONCAT3(x,y,z) x ## y ## z
107 #define CONCAT3(x,y,z) XCONCAT3(x,y,z)
108 #define NDESC_BUFMODE CONCAT3(NDESC_,RX_MODE,BUFMODE)
109 #define rxd_4k CONCAT3(rxd,RX_MODE,_4k)
110 #define rxdesc ___CONCAT(rxd,RX_MODE)
111 
112 #define NEXTTX(x)	(((x)+1) % NTXDESCS)
113 #define NRXFRAGS	RX_MODE /* hardware imposed frags */
114 #define NRXPAGES	((NRXDESCS/NDESC_BUFMODE)+1)
115 #define NRXREAL		(NRXPAGES*NDESC_BUFMODE)
116 #define RXMAPSZ		(NRXPAGES*PAGE_SIZE)
117 
118 #ifdef XGE_EVENT_COUNTERS
119 #define XGE_EVCNT_INCR(ev)	(ev)->ev_count++
120 #else
121 #define XGE_EVCNT_INCR(ev)	/* nothing */
122 #endif
123 
124 /*
125  * Magics to fix a bug when the mac address can't be read correctly.
126  * Comes from the Linux driver.
127  */
128 static uint64_t fix_mac[] = {
129 	0x0060000000000000ULL, 0x0060600000000000ULL,
130 	0x0040600000000000ULL, 0x0000600000000000ULL,
131 	0x0020600000000000ULL, 0x0060600000000000ULL,
132 	0x0020600000000000ULL, 0x0060600000000000ULL,
133 	0x0020600000000000ULL, 0x0060600000000000ULL,
134 	0x0020600000000000ULL, 0x0060600000000000ULL,
135 	0x0020600000000000ULL, 0x0060600000000000ULL,
136 	0x0020600000000000ULL, 0x0060600000000000ULL,
137 	0x0020600000000000ULL, 0x0060600000000000ULL,
138 	0x0020600000000000ULL, 0x0060600000000000ULL,
139 	0x0020600000000000ULL, 0x0060600000000000ULL,
140 	0x0020600000000000ULL, 0x0060600000000000ULL,
141 	0x0020600000000000ULL, 0x0000600000000000ULL,
142 	0x0040600000000000ULL, 0x0060600000000000ULL,
143 };
144 
145 
146 struct xge_softc {
147 	struct device sc_dev;
148 	struct ethercom sc_ethercom;
149 #define sc_if sc_ethercom.ec_if
150 	bus_dma_tag_t sc_dmat;
151 	bus_space_tag_t sc_st;
152 	bus_space_handle_t sc_sh;
153 	bus_space_tag_t sc_txt;
154 	bus_space_handle_t sc_txh;
155 	void *sc_ih;
156 
157 	struct ifmedia xena_media;
158 	pcireg_t sc_pciregs[16];
159 
160 	/* Transmit structures */
161 	struct txd *sc_txd[NTXDESCS];	/* transmit frags array */
162 	bus_addr_t sc_txdp[NTXDESCS];	/* bus address of transmit frags */
163 	bus_dmamap_t sc_txm[NTXDESCS];	/* transmit frags map */
164 	struct mbuf *sc_txb[NTXDESCS];	/* transmit mbuf pointer */
165 	int sc_nexttx, sc_lasttx;
166 	bus_dmamap_t sc_txmap;		/* transmit descriptor map */
167 
168 	/* Receive data */
169 	bus_dmamap_t sc_rxmap;		/* receive descriptor map */
170 	struct rxd_4k *sc_rxd_4k[NRXPAGES]; /* receive desc pages */
171 	bus_dmamap_t sc_rxm[NRXREAL];	/* receive buffer map */
172 	struct mbuf *sc_rxb[NRXREAL];	/* mbufs on receive descriptors */
173 	int sc_nextrx;			/* next descriptor to check */
174 
175 #ifdef XGE_EVENT_COUNTERS
176 	struct evcnt sc_intr;	/* # of interrupts */
177 	struct evcnt sc_txintr;	/* # of transmit interrupts */
178 	struct evcnt sc_rxintr;	/* # of receive interrupts */
179 	struct evcnt sc_txqe;	/* # of xmit intrs when board queue empty */
180 #endif
181 };
182 
183 static int xge_match(struct device *parent, struct cfdata *cf, void *aux);
184 static void xge_attach(struct device *parent, struct device *self, void *aux);
185 static int xge_alloc_txmem(struct xge_softc *);
186 static int xge_alloc_rxmem(struct xge_softc *);
187 static void xge_start(struct ifnet *);
188 static void xge_stop(struct ifnet *, int);
189 static int xge_add_rxbuf(struct xge_softc *, int);
190 static void xge_mcast_filter(struct xge_softc *sc);
191 static int xge_setup_xgxs(struct xge_softc *sc);
192 static int xge_ioctl(struct ifnet *ifp, u_long cmd, void *data);
193 static int xge_init(struct ifnet *ifp);
194 static void xge_ifmedia_status(struct ifnet *, struct ifmediareq *);
195 static int xge_xgmii_mediachange(struct ifnet *);
196 static int xge_intr(void  *);
197 
198 /*
199  * Helpers to address registers.
200  */
201 #define PIF_WCSR(csr, val)	pif_wcsr(sc, csr, val)
202 #define PIF_RCSR(csr)		pif_rcsr(sc, csr)
203 #define TXP_WCSR(csr, val)	txp_wcsr(sc, csr, val)
204 #define PIF_WKEY(csr, val)	pif_wkey(sc, csr, val)
205 
206 static inline void
207 pif_wcsr(struct xge_softc *sc, bus_size_t csr, uint64_t val)
208 {
209 	uint32_t lval, hval;
210 
211 	lval = val&0xffffffff;
212 	hval = val>>32;
213 	bus_space_write_4(sc->sc_st, sc->sc_sh, csr, lval);
214 	bus_space_write_4(sc->sc_st, sc->sc_sh, csr+4, hval);
215 }
216 
217 static inline uint64_t
218 pif_rcsr(struct xge_softc *sc, bus_size_t csr)
219 {
220 	uint64_t val, val2;
221 	val = bus_space_read_4(sc->sc_st, sc->sc_sh, csr);
222 	val2 = bus_space_read_4(sc->sc_st, sc->sc_sh, csr+4);
223 	val |= (val2 << 32);
224 	return val;
225 }
226 
227 static inline void
228 txp_wcsr(struct xge_softc *sc, bus_size_t csr, uint64_t val)
229 {
230 	uint32_t lval, hval;
231 
232 	lval = val&0xffffffff;
233 	hval = val>>32;
234 	bus_space_write_4(sc->sc_txt, sc->sc_txh, csr, lval);
235 	bus_space_write_4(sc->sc_txt, sc->sc_txh, csr+4, hval);
236 }
237 
238 
239 static inline void
240 pif_wkey(struct xge_softc *sc, bus_size_t csr, uint64_t val)
241 {
242 	uint32_t lval, hval;
243 
244 	lval = val&0xffffffff;
245 	hval = val>>32;
246 	PIF_WCSR(RMAC_CFG_KEY, RMAC_KEY_VALUE);
247 	bus_space_write_4(sc->sc_st, sc->sc_sh, csr, lval);
248 	PIF_WCSR(RMAC_CFG_KEY, RMAC_KEY_VALUE);
249 	bus_space_write_4(sc->sc_st, sc->sc_sh, csr+4, hval);
250 }
251 
252 
253 CFATTACH_DECL(xge, sizeof(struct xge_softc),
254     xge_match, xge_attach, NULL, NULL);
255 
256 #define XNAME device_xname(&sc->sc_dev)
257 
258 #define XGE_RXSYNC(desc, what) \
259 	bus_dmamap_sync(sc->sc_dmat, sc->sc_rxmap, \
260 	(desc/NDESC_BUFMODE) * XGE_PAGE + sizeof(struct rxdesc) * \
261 	(desc%NDESC_BUFMODE), sizeof(struct rxdesc), what)
262 #define XGE_RXD(desc)	&sc->sc_rxd_4k[desc/NDESC_BUFMODE]-> \
263 	r4_rxd[desc%NDESC_BUFMODE]
264 
265 /*
266  * Non-tunable constants.
267  */
268 #define XGE_MAX_MTU		9600
269 #define	XGE_IP_MAXPACKET	65535	/* same as IP_MAXPACKET */
270 
271 static int
272 xge_match(struct device *parent, struct cfdata *cf, void *aux)
273 {
274 	struct pci_attach_args *pa = aux;
275 
276 	if (PCI_VENDOR(pa->pa_id) == PCI_VENDOR_S2IO &&
277 	    PCI_PRODUCT(pa->pa_id) == PCI_PRODUCT_S2IO_XFRAME)
278 		return (1);
279 
280 	return (0);
281 }
282 
283 void
284 xge_attach(struct device *parent, struct device *self, void *aux)
285 {
286 	struct pci_attach_args *pa = aux;
287 	struct xge_softc *sc;
288 	struct ifnet *ifp;
289 	pcireg_t memtype;
290 	pci_intr_handle_t ih;
291 	const char *intrstr = NULL;
292 	pci_chipset_tag_t pc = pa->pa_pc;
293 	uint8_t enaddr[ETHER_ADDR_LEN];
294 	uint64_t val;
295 	int i;
296 
297 	sc = (struct xge_softc *)self;
298 
299 	sc->sc_dmat = pa->pa_dmat;
300 
301 	/* Get BAR0 address */
302 	memtype = pci_mapreg_type(pa->pa_pc, pa->pa_tag, XGE_PIF_BAR);
303 	if (pci_mapreg_map(pa, XGE_PIF_BAR, memtype, 0,
304 	    &sc->sc_st, &sc->sc_sh, 0, 0)) {
305 		aprint_error("%s: unable to map PIF BAR registers\n", XNAME);
306 		return;
307 	}
308 
309 	memtype = pci_mapreg_type(pa->pa_pc, pa->pa_tag, XGE_TXP_BAR);
310 	if (pci_mapreg_map(pa, XGE_TXP_BAR, memtype, 0,
311 	    &sc->sc_txt, &sc->sc_txh, 0, 0)) {
312 		aprint_error("%s: unable to map TXP BAR registers\n", XNAME);
313 		return;
314 	}
315 
316 	/* Save PCI config space */
317 	for (i = 0; i < 64; i += 4)
318 		sc->sc_pciregs[i/4] = pci_conf_read(pa->pa_pc, pa->pa_tag, i);
319 
320 #if BYTE_ORDER == LITTLE_ENDIAN
321 	val = (uint64_t)0xFFFFFFFFFFFFFFFFULL;
322 	val &= ~(TxF_R_SE|RxF_W_SE);
323 	PIF_WCSR(SWAPPER_CTRL, val);
324 	PIF_WCSR(SWAPPER_CTRL, val);
325 #elif BYTE_ORDER == BIG_ENDIAN
326 	/* do nothing */
327 #else
328 #error bad endianness!
329 #endif
330 
331 	if ((val = PIF_RCSR(PIF_RD_SWAPPER_Fb)) != SWAPPER_MAGIC)
332 		return printf("%s: failed configuring endian, %llx != %llx!\n",
333 		    XNAME, (unsigned long long)val, SWAPPER_MAGIC);
334 
335 	/*
336 	 * The MAC addr may be all FF's, which is not good.
337 	 * Resolve it by writing some magics to GPIO_CONTROL and
338 	 * force a chip reset to read in the serial eeprom again.
339 	 */
340 	for (i = 0; i < sizeof(fix_mac)/sizeof(fix_mac[0]); i++) {
341 		PIF_WCSR(GPIO_CONTROL, fix_mac[i]);
342 		PIF_RCSR(GPIO_CONTROL);
343 	}
344 
345 	/*
346 	 * Reset the chip and restore the PCI registers.
347 	 */
348 	PIF_WCSR(SW_RESET, 0xa5a5a50000000000ULL);
349 	DELAY(500000);
350 	for (i = 0; i < 64; i += 4)
351 		pci_conf_write(pa->pa_pc, pa->pa_tag, i, sc->sc_pciregs[i/4]);
352 
353 	/*
354 	 * Restore the byte order registers.
355 	 */
356 #if BYTE_ORDER == LITTLE_ENDIAN
357 	val = (uint64_t)0xFFFFFFFFFFFFFFFFULL;
358 	val &= ~(TxF_R_SE|RxF_W_SE);
359 	PIF_WCSR(SWAPPER_CTRL, val);
360 	PIF_WCSR(SWAPPER_CTRL, val);
361 #elif BYTE_ORDER == BIG_ENDIAN
362 	/* do nothing */
363 #else
364 #error bad endianness!
365 #endif
366 
367 	if ((val = PIF_RCSR(PIF_RD_SWAPPER_Fb)) != SWAPPER_MAGIC)
368 		return printf("%s: failed configuring endian2, %llx != %llx!\n",
369 		    XNAME, (unsigned long long)val, SWAPPER_MAGIC);
370 
371 	/*
372 	 * XGXS initialization.
373 	 */
374 	/* 29, reset */
375 	PIF_WCSR(SW_RESET, 0);
376 	DELAY(500000);
377 
378 	/* 30, configure XGXS transceiver */
379 	xge_setup_xgxs(sc);
380 
381 	/* 33, program MAC address (not needed here) */
382 	/* Get ethernet address */
383 	PIF_WCSR(RMAC_ADDR_CMD_MEM,
384 	    RMAC_ADDR_CMD_MEM_STR|RMAC_ADDR_CMD_MEM_OFF(0));
385 	while (PIF_RCSR(RMAC_ADDR_CMD_MEM) & RMAC_ADDR_CMD_MEM_STR)
386 		;
387 	val = PIF_RCSR(RMAC_ADDR_DATA0_MEM);
388 	for (i = 0; i < ETHER_ADDR_LEN; i++)
389 		enaddr[i] = (uint8_t)(val >> (56 - (8*i)));
390 
391 	/*
392 	 * Get memory for transmit descriptor lists.
393 	 */
394 	if (xge_alloc_txmem(sc))
395 		return printf("%s: failed allocating txmem.\n", XNAME);
396 
397 	/* 9 and 10 - set FIFO number/prio */
398 	PIF_WCSR(TX_FIFO_P0, TX_FIFO_LEN0(NTXDESCS));
399 	PIF_WCSR(TX_FIFO_P1, 0ULL);
400 	PIF_WCSR(TX_FIFO_P2, 0ULL);
401 	PIF_WCSR(TX_FIFO_P3, 0ULL);
402 
403 	/* 11, XXX set round-robin prio? */
404 
405 	/* 12, enable transmit FIFO */
406 	val = PIF_RCSR(TX_FIFO_P0);
407 	val |= TX_FIFO_ENABLE;
408 	PIF_WCSR(TX_FIFO_P0, val);
409 
410 	/* 13, disable some error checks */
411 	PIF_WCSR(TX_PA_CFG,
412 	    TX_PA_CFG_IFR|TX_PA_CFG_ISO|TX_PA_CFG_ILC|TX_PA_CFG_ILE);
413 
414 	/*
415 	 * Create transmit DMA maps.
416 	 * Make them large for TSO.
417 	 */
418 	for (i = 0; i < NTXDESCS; i++) {
419 		if (bus_dmamap_create(sc->sc_dmat, XGE_IP_MAXPACKET,
420 		    NTXFRAGS, MCLBYTES, 0, 0, &sc->sc_txm[i]))
421 			return printf("%s: cannot create TX DMA maps\n", XNAME);
422 	}
423 
424 	sc->sc_lasttx = NTXDESCS-1;
425 
426 	/*
427 	 * RxDMA initialization.
428 	 * Only use one out of 8 possible receive queues.
429 	 */
430 	if (xge_alloc_rxmem(sc))	/* allocate rx descriptor memory */
431 		return printf("%s: failed allocating rxmem\n", XNAME);
432 
433 	/* Create receive buffer DMA maps */
434 	for (i = 0; i < NRXREAL; i++) {
435 		if (bus_dmamap_create(sc->sc_dmat, XGE_MAX_MTU,
436 		    NRXFRAGS, MCLBYTES, 0, 0, &sc->sc_rxm[i]))
437 			return printf("%s: cannot create RX DMA maps\n", XNAME);
438 	}
439 
440 	/* allocate mbufs to receive descriptors */
441 	for (i = 0; i < NRXREAL; i++)
442 		if (xge_add_rxbuf(sc, i))
443 			panic("out of mbufs too early");
444 
445 	/* 14, setup receive ring priority */
446 	PIF_WCSR(RX_QUEUE_PRIORITY, 0ULL); /* only use one ring */
447 
448 	/* 15, setup receive ring round-robin calendar */
449 	PIF_WCSR(RX_W_ROUND_ROBIN_0, 0ULL); /* only use one ring */
450 	PIF_WCSR(RX_W_ROUND_ROBIN_1, 0ULL);
451 	PIF_WCSR(RX_W_ROUND_ROBIN_2, 0ULL);
452 	PIF_WCSR(RX_W_ROUND_ROBIN_3, 0ULL);
453 	PIF_WCSR(RX_W_ROUND_ROBIN_4, 0ULL);
454 
455 	/* 16, write receive ring start address */
456 	PIF_WCSR(PRC_RXD0_0, (uint64_t)sc->sc_rxmap->dm_segs[0].ds_addr);
457 	/* PRC_RXD0_[1-7] are not used */
458 
459 	/* 17, Setup alarm registers */
460 	PIF_WCSR(PRC_ALARM_ACTION, 0ULL); /* Default everything to retry */
461 
462 	/* 18, init receive ring controller */
463 #if RX_MODE == RX_MODE_1
464 	val = RING_MODE_1;
465 #elif RX_MODE == RX_MODE_3
466 	val = RING_MODE_3;
467 #else /* RX_MODE == RX_MODE_5 */
468 	val = RING_MODE_5;
469 #endif
470 	PIF_WCSR(PRC_CTRL_0, RC_IN_SVC|val);
471 	/* leave 1-7 disabled */
472 	/* XXXX snoop configuration? */
473 
474 	/* 19, set chip memory assigned to the queue */
475 	PIF_WCSR(RX_QUEUE_CFG, MC_QUEUE(0, 64)); /* all 64M to queue 0 */
476 
477 	/* 20, setup RLDRAM parameters */
478 	/* do not touch it for now */
479 
480 	/* 21, setup pause frame thresholds */
481 	/* so not touch the defaults */
482 	/* XXX - must 0xff be written as stated in the manual? */
483 
484 	/* 22, configure RED */
485 	/* we do not want to drop packets, so ignore */
486 
487 	/* 23, initiate RLDRAM */
488 	val = PIF_RCSR(MC_RLDRAM_MRS);
489 	val |= MC_QUEUE_SIZE_ENABLE|MC_RLDRAM_MRS_ENABLE;
490 	PIF_WCSR(MC_RLDRAM_MRS, val);
491 	DELAY(1000);
492 
493 	/*
494 	 * Setup interrupt policies.
495 	 */
496 	/* 40, Transmit interrupts */
497 	PIF_WCSR(TTI_DATA1_MEM, TX_TIMER_VAL(0x1ff) | TX_TIMER_AC |
498 	    TX_URNG_A(5) | TX_URNG_B(20) | TX_URNG_C(48));
499 	PIF_WCSR(TTI_DATA2_MEM,
500 	    TX_UFC_A(25) | TX_UFC_B(64) | TX_UFC_C(128) | TX_UFC_D(512));
501 	PIF_WCSR(TTI_COMMAND_MEM, TTI_CMD_MEM_WE | TTI_CMD_MEM_STROBE);
502 	while (PIF_RCSR(TTI_COMMAND_MEM) & TTI_CMD_MEM_STROBE)
503 		;
504 
505 	/* 41, Receive interrupts */
506 	PIF_WCSR(RTI_DATA1_MEM, RX_TIMER_VAL(0x800) | RX_TIMER_AC |
507 	    RX_URNG_A(5) | RX_URNG_B(20) | RX_URNG_C(50));
508 	PIF_WCSR(RTI_DATA2_MEM,
509 	    RX_UFC_A(64) | RX_UFC_B(128) | RX_UFC_C(256) | RX_UFC_D(512));
510 	PIF_WCSR(RTI_COMMAND_MEM, RTI_CMD_MEM_WE | RTI_CMD_MEM_STROBE);
511 	while (PIF_RCSR(RTI_COMMAND_MEM) & RTI_CMD_MEM_STROBE)
512 		;
513 
514 	/*
515 	 * Setup media stuff.
516 	 */
517 	ifmedia_init(&sc->xena_media, IFM_IMASK, xge_xgmii_mediachange,
518 	    xge_ifmedia_status);
519 	ifmedia_add(&sc->xena_media, IFM_ETHER|IFM_10G_LR, 0, NULL);
520 	ifmedia_set(&sc->xena_media, IFM_ETHER|IFM_10G_LR);
521 
522 	aprint_normal("%s: Ethernet address %s\n", XNAME,
523 	    ether_sprintf(enaddr));
524 
525 	ifp = &sc->sc_ethercom.ec_if;
526 	strlcpy(ifp->if_xname, device_xname(&sc->sc_dev), IFNAMSIZ);
527 	ifp->if_baudrate = 10000000000LL;
528 	ifp->if_init = xge_init;
529 	ifp->if_stop = xge_stop;
530 	ifp->if_softc = sc;
531 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
532 	ifp->if_ioctl = xge_ioctl;
533 	ifp->if_start = xge_start;
534 	IFQ_SET_MAXLEN(&ifp->if_snd, max(NTXDESCS - 1, IFQ_MAXLEN));
535 	IFQ_SET_READY(&ifp->if_snd);
536 
537 	/*
538 	 * Offloading capabilities.
539 	 */
540 	sc->sc_ethercom.ec_capabilities |=
541 	    ETHERCAP_JUMBO_MTU | ETHERCAP_VLAN_MTU;
542 	ifp->if_capabilities |=
543 	    IFCAP_CSUM_IPv4_Rx | IFCAP_CSUM_IPv4_Tx |
544 	    IFCAP_CSUM_TCPv4_Rx | IFCAP_CSUM_TCPv4_Tx |
545 	    IFCAP_CSUM_UDPv4_Rx | IFCAP_CSUM_UDPv4_Tx | IFCAP_TSOv4;
546 
547 	/*
548 	 * Attach the interface.
549 	 */
550 	if_attach(ifp);
551 	ether_ifattach(ifp, enaddr);
552 
553 	/*
554 	 * Setup interrupt vector before initializing.
555 	 */
556 	if (pci_intr_map(pa, &ih))
557 		return aprint_error_dev(&sc->sc_dev, "unable to map interrupt\n");
558 	intrstr = pci_intr_string(pc, ih);
559 	if ((sc->sc_ih =
560 	    pci_intr_establish(pc, ih, IPL_NET, xge_intr, sc)) == NULL)
561 		return aprint_error_dev(&sc->sc_dev, "unable to establish interrupt at %s\n",
562 		    intrstr ? intrstr : "<unknown>");
563 	aprint_normal_dev(&sc->sc_dev, "interrupting at %s\n", intrstr);
564 
565 #ifdef XGE_EVENT_COUNTERS
566 	evcnt_attach_dynamic(&sc->sc_intr, EVCNT_TYPE_MISC,
567 	    NULL, XNAME, "intr");
568 	evcnt_attach_dynamic(&sc->sc_txintr, EVCNT_TYPE_MISC,
569 	    NULL, XNAME, "txintr");
570 	evcnt_attach_dynamic(&sc->sc_rxintr, EVCNT_TYPE_MISC,
571 	    NULL, XNAME, "rxintr");
572 	evcnt_attach_dynamic(&sc->sc_txqe, EVCNT_TYPE_MISC,
573 	    NULL, XNAME, "txqe");
574 #endif
575 }
576 
577 void
578 xge_ifmedia_status(struct ifnet *ifp, struct ifmediareq *ifmr)
579 {
580 	struct xge_softc *sc = ifp->if_softc;
581 	uint64_t reg;
582 
583 	ifmr->ifm_status = IFM_AVALID;
584 	ifmr->ifm_active = IFM_ETHER|IFM_10G_LR;
585 
586 	reg = PIF_RCSR(ADAPTER_STATUS);
587 	if ((reg & (RMAC_REMOTE_FAULT|RMAC_LOCAL_FAULT)) == 0)
588 		ifmr->ifm_status |= IFM_ACTIVE;
589 }
590 
591 int
592 xge_xgmii_mediachange(struct ifnet *ifp)
593 {
594 	return 0;
595 }
596 
597 static void
598 xge_enable(struct xge_softc *sc)
599 {
600 	uint64_t val;
601 
602 	/* 2, enable adapter */
603 	val = PIF_RCSR(ADAPTER_CONTROL);
604 	val |= ADAPTER_EN;
605 	PIF_WCSR(ADAPTER_CONTROL, val);
606 
607 	/* 3, light the card enable led */
608 	val = PIF_RCSR(ADAPTER_CONTROL);
609 	val |= LED_ON;
610 	PIF_WCSR(ADAPTER_CONTROL, val);
611 	printf("%s: link up\n", XNAME);
612 
613 }
614 
615 int
616 xge_init(struct ifnet *ifp)
617 {
618 	struct xge_softc *sc = ifp->if_softc;
619 	uint64_t val;
620 
621 	if (ifp->if_flags & IFF_RUNNING)
622 		return 0;
623 
624 	/* 31+32, setup MAC config */
625 	PIF_WKEY(MAC_CFG, TMAC_EN|RMAC_EN|TMAC_APPEND_PAD|RMAC_STRIP_FCS|
626 	    RMAC_BCAST_EN|RMAC_DISCARD_PFRM|RMAC_PROM_EN);
627 
628 	DELAY(1000);
629 
630 	/* 54, ensure that the adapter is 'quiescent' */
631 	val = PIF_RCSR(ADAPTER_STATUS);
632 	if ((val & QUIESCENT) != QUIESCENT) {
633 		char buf[200];
634 		printf("%s: adapter not quiescent, aborting\n", XNAME);
635 		val = (val & QUIESCENT) ^ QUIESCENT;
636 		snprintb(buf, sizeof buf, QUIESCENT_BMSK, val);
637 		printf("%s: ADAPTER_STATUS missing bits %s\n", XNAME, buf);
638 		return 1;
639 	}
640 
641 	/* 56, enable the transmit laser */
642 	val = PIF_RCSR(ADAPTER_CONTROL);
643 	val |= EOI_TX_ON;
644 	PIF_WCSR(ADAPTER_CONTROL, val);
645 
646 	xge_enable(sc);
647 	/*
648 	 * Enable all interrupts
649 	 */
650 	PIF_WCSR(TX_TRAFFIC_MASK, 0);
651 	PIF_WCSR(RX_TRAFFIC_MASK, 0);
652 	PIF_WCSR(GENERAL_INT_MASK, 0);
653 	PIF_WCSR(TXPIC_INT_MASK, 0);
654 	PIF_WCSR(RXPIC_INT_MASK, 0);
655 	PIF_WCSR(MAC_INT_MASK, MAC_TMAC_INT); /* only from RMAC */
656 	PIF_WCSR(MAC_RMAC_ERR_MASK, ~RMAC_LINK_STATE_CHANGE_INT);
657 
658 
659 	/* Done... */
660 	ifp->if_flags |= IFF_RUNNING;
661 	ifp->if_flags &= ~IFF_OACTIVE;
662 
663 	return 0;
664 }
665 
666 static void
667 xge_stop(struct ifnet *ifp, int disable)
668 {
669 	struct xge_softc *sc = ifp->if_softc;
670 	uint64_t val;
671 
672 	val = PIF_RCSR(ADAPTER_CONTROL);
673 	val &= ~ADAPTER_EN;
674 	PIF_WCSR(ADAPTER_CONTROL, val);
675 
676 	while ((PIF_RCSR(ADAPTER_STATUS) & QUIESCENT) != QUIESCENT)
677 		;
678 }
679 
680 int
681 xge_intr(void *pv)
682 {
683 	struct xge_softc *sc = pv;
684 	struct txd *txd;
685 	struct ifnet *ifp = &sc->sc_if;
686 	bus_dmamap_t dmp;
687 	uint64_t val;
688 	int i, lasttx, plen;
689 
690 	val = PIF_RCSR(GENERAL_INT_STATUS);
691 	if (val == 0)
692 		return 0; /* no interrupt here */
693 
694 	XGE_EVCNT_INCR(&sc->sc_intr);
695 
696 	PIF_WCSR(GENERAL_INT_STATUS, val);
697 
698 	if ((val = PIF_RCSR(MAC_RMAC_ERR_REG)) & RMAC_LINK_STATE_CHANGE_INT) {
699 		/* Wait for quiescence */
700 		printf("%s: link down\n", XNAME);
701 		while ((PIF_RCSR(ADAPTER_STATUS) & QUIESCENT) != QUIESCENT)
702 			;
703 		PIF_WCSR(MAC_RMAC_ERR_REG, RMAC_LINK_STATE_CHANGE_INT);
704 
705 		val = PIF_RCSR(ADAPTER_STATUS);
706 		if ((val & (RMAC_REMOTE_FAULT|RMAC_LOCAL_FAULT)) == 0)
707 			xge_enable(sc); /* Only if link restored */
708 	}
709 
710 	if ((val = PIF_RCSR(TX_TRAFFIC_INT))) {
711 		XGE_EVCNT_INCR(&sc->sc_txintr);
712 		PIF_WCSR(TX_TRAFFIC_INT, val); /* clear interrupt bits */
713 	}
714 	/*
715 	 * Collect sent packets.
716 	 */
717 	lasttx = sc->sc_lasttx;
718 	while ((i = NEXTTX(sc->sc_lasttx)) != sc->sc_nexttx) {
719 		txd = sc->sc_txd[i];
720 		dmp = sc->sc_txm[i];
721 
722 		bus_dmamap_sync(sc->sc_dmat, dmp, 0,
723 		    dmp->dm_mapsize,
724 		    BUS_DMASYNC_POSTREAD|BUS_DMASYNC_POSTWRITE);
725 
726 		if (txd->txd_control1 & TXD_CTL1_OWN) {
727 			bus_dmamap_sync(sc->sc_dmat, dmp, 0,
728 			    dmp->dm_mapsize, BUS_DMASYNC_PREREAD);
729 			break;
730 		}
731 		bus_dmamap_unload(sc->sc_dmat, dmp);
732 		m_freem(sc->sc_txb[i]);
733 		ifp->if_opackets++;
734 		sc->sc_lasttx = i;
735 	}
736 	if (i == sc->sc_nexttx) {
737 		XGE_EVCNT_INCR(&sc->sc_txqe);
738 	}
739 
740 	if (sc->sc_lasttx != lasttx)
741 		ifp->if_flags &= ~IFF_OACTIVE;
742 
743 	xge_start(ifp); /* Try to get more packets on the wire */
744 
745 	if ((val = PIF_RCSR(RX_TRAFFIC_INT))) {
746 		XGE_EVCNT_INCR(&sc->sc_rxintr);
747 		PIF_WCSR(RX_TRAFFIC_INT, val); /* clear interrupt bits */
748 	}
749 
750 	for (;;) {
751 		struct rxdesc *rxd;
752 		struct mbuf *m;
753 
754 		XGE_RXSYNC(sc->sc_nextrx,
755 		    BUS_DMASYNC_POSTREAD|BUS_DMASYNC_POSTWRITE);
756 
757 		rxd = XGE_RXD(sc->sc_nextrx);
758 		if (rxd->rxd_control1 & RXD_CTL1_OWN) {
759 			XGE_RXSYNC(sc->sc_nextrx, BUS_DMASYNC_PREREAD);
760 			break;
761 		}
762 
763 		/* got a packet */
764 		m = sc->sc_rxb[sc->sc_nextrx];
765 #if RX_MODE == RX_MODE_1
766 		plen = m->m_len = RXD_CTL2_BUF0SIZ(rxd->rxd_control2);
767 #elif RX_MODE == RX_MODE_3
768 #error Fix rxmodes in xge_intr
769 #elif RX_MODE == RX_MODE_5
770 		plen = m->m_len = RXD_CTL2_BUF0SIZ(rxd->rxd_control2);
771 		plen += m->m_next->m_len = RXD_CTL2_BUF1SIZ(rxd->rxd_control2);
772 		plen += m->m_next->m_next->m_len =
773 		    RXD_CTL2_BUF2SIZ(rxd->rxd_control2);
774 		plen += m->m_next->m_next->m_next->m_len =
775 		    RXD_CTL3_BUF3SIZ(rxd->rxd_control3);
776 		plen += m->m_next->m_next->m_next->m_next->m_len =
777 		    RXD_CTL3_BUF4SIZ(rxd->rxd_control3);
778 #endif
779 		m->m_pkthdr.rcvif = ifp;
780 		m->m_pkthdr.len = plen;
781 
782 		val = rxd->rxd_control1;
783 
784 		if (xge_add_rxbuf(sc, sc->sc_nextrx)) {
785 			/* Failed, recycle this mbuf */
786 #if RX_MODE == RX_MODE_1
787 			rxd->rxd_control2 = RXD_MKCTL2(MCLBYTES, 0, 0);
788 			rxd->rxd_control1 = RXD_CTL1_OWN;
789 #elif RX_MODE == RX_MODE_3
790 #elif RX_MODE == RX_MODE_5
791 #endif
792 			XGE_RXSYNC(sc->sc_nextrx,
793 			    BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE);
794 			ifp->if_ierrors++;
795 			break;
796 		}
797 
798 		ifp->if_ipackets++;
799 
800 		if (RXD_CTL1_PROTOS(val) & (RXD_CTL1_P_IPv4|RXD_CTL1_P_IPv6)) {
801 			m->m_pkthdr.csum_flags |= M_CSUM_IPv4;
802 			if (RXD_CTL1_L3CSUM(val) != 0xffff)
803 				m->m_pkthdr.csum_flags |= M_CSUM_IPv4_BAD;
804 		}
805 		if (RXD_CTL1_PROTOS(val) & RXD_CTL1_P_TCP) {
806 			m->m_pkthdr.csum_flags |= M_CSUM_TCPv4|M_CSUM_TCPv6;
807 			if (RXD_CTL1_L4CSUM(val) != 0xffff)
808 				m->m_pkthdr.csum_flags |= M_CSUM_TCP_UDP_BAD;
809 		}
810 		if (RXD_CTL1_PROTOS(val) & RXD_CTL1_P_UDP) {
811 			m->m_pkthdr.csum_flags |= M_CSUM_UDPv4|M_CSUM_UDPv6;
812 			if (RXD_CTL1_L4CSUM(val) != 0xffff)
813 				m->m_pkthdr.csum_flags |= M_CSUM_TCP_UDP_BAD;
814 		}
815 
816 #if NBPFILTER > 0
817 		if (ifp->if_bpf)
818 			bpf_mtap(ifp->if_bpf, m);
819 #endif /* NBPFILTER > 0 */
820 
821 		(*ifp->if_input)(ifp, m);
822 
823 		if (++sc->sc_nextrx == NRXREAL)
824 			sc->sc_nextrx = 0;
825 
826 	}
827 
828 	return 0;
829 }
830 
831 int
832 xge_ioctl(struct ifnet *ifp, u_long cmd, void *data)
833 {
834 	struct xge_softc *sc = ifp->if_softc;
835 	struct ifreq *ifr = (struct ifreq *) data;
836 	int s, error = 0;
837 
838 	s = splnet();
839 
840 	switch (cmd) {
841 	case SIOCSIFMTU:
842 		if (ifr->ifr_mtu < ETHERMIN || ifr->ifr_mtu > XGE_MAX_MTU)
843 			error = EINVAL;
844 		else if ((error = ifioctl_common(ifp, cmd, data)) == ENETRESET){
845 			PIF_WCSR(RMAC_MAX_PYLD_LEN,
846 			    RMAC_PYLD_LEN(ifr->ifr_mtu));
847 			error = 0;
848 		}
849 		break;
850 
851 	case SIOCGIFMEDIA:
852 	case SIOCSIFMEDIA:
853 		error = ifmedia_ioctl(ifp, ifr, &sc->xena_media, cmd);
854 		break;
855 
856 	default:
857 		if ((error = ether_ioctl(ifp, cmd, data)) != ENETRESET)
858 			break;
859 
860 		error = 0;
861 
862 		if (cmd != SIOCADDMULTI && cmd != SIOCDELMULTI)
863 			;
864 		else if (ifp->if_flags & IFF_RUNNING) {
865 			/* Change multicast list */
866 			xge_mcast_filter(sc);
867 		}
868 		break;
869 	}
870 
871 	splx(s);
872 	return(error);
873 }
874 
875 void
876 xge_mcast_filter(struct xge_softc *sc)
877 {
878 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
879 	struct ethercom *ec = &sc->sc_ethercom;
880 	struct ether_multi *enm;
881 	struct ether_multistep step;
882 	int i, numaddr = 1; /* first slot used for card unicast address */
883 	uint64_t val;
884 
885 	ETHER_FIRST_MULTI(step, ec, enm);
886 	while (enm != NULL) {
887 		if (memcmp(enm->enm_addrlo, enm->enm_addrhi, ETHER_ADDR_LEN)) {
888 			/* Skip ranges */
889 			goto allmulti;
890 		}
891 		if (numaddr == MAX_MCAST_ADDR)
892 			goto allmulti;
893 		for (val = 0, i = 0; i < ETHER_ADDR_LEN; i++) {
894 			val <<= 8;
895 			val |= enm->enm_addrlo[i];
896 		}
897 		PIF_WCSR(RMAC_ADDR_DATA0_MEM, val << 16);
898 		PIF_WCSR(RMAC_ADDR_DATA1_MEM, 0xFFFFFFFFFFFFFFFFULL);
899 		PIF_WCSR(RMAC_ADDR_CMD_MEM, RMAC_ADDR_CMD_MEM_WE|
900 		    RMAC_ADDR_CMD_MEM_STR|RMAC_ADDR_CMD_MEM_OFF(numaddr));
901 		while (PIF_RCSR(RMAC_ADDR_CMD_MEM) & RMAC_ADDR_CMD_MEM_STR)
902 			;
903 		numaddr++;
904 		ETHER_NEXT_MULTI(step, enm);
905 	}
906 	/* set the remaining entries to the broadcast address */
907 	for (i = numaddr; i < MAX_MCAST_ADDR; i++) {
908 		PIF_WCSR(RMAC_ADDR_DATA0_MEM, 0xffffffffffff0000ULL);
909 		PIF_WCSR(RMAC_ADDR_DATA1_MEM, 0xFFFFFFFFFFFFFFFFULL);
910 		PIF_WCSR(RMAC_ADDR_CMD_MEM, RMAC_ADDR_CMD_MEM_WE|
911 		    RMAC_ADDR_CMD_MEM_STR|RMAC_ADDR_CMD_MEM_OFF(i));
912 		while (PIF_RCSR(RMAC_ADDR_CMD_MEM) & RMAC_ADDR_CMD_MEM_STR)
913 			;
914 	}
915 	ifp->if_flags &= ~IFF_ALLMULTI;
916 	return;
917 
918 allmulti:
919 	/* Just receive everything with the multicast bit set */
920 	ifp->if_flags |= IFF_ALLMULTI;
921 	PIF_WCSR(RMAC_ADDR_DATA0_MEM, 0x8000000000000000ULL);
922 	PIF_WCSR(RMAC_ADDR_DATA1_MEM, 0xF000000000000000ULL);
923 	PIF_WCSR(RMAC_ADDR_CMD_MEM, RMAC_ADDR_CMD_MEM_WE|
924 	    RMAC_ADDR_CMD_MEM_STR|RMAC_ADDR_CMD_MEM_OFF(1));
925 	while (PIF_RCSR(RMAC_ADDR_CMD_MEM) & RMAC_ADDR_CMD_MEM_STR)
926 		;
927 }
928 
929 void
930 xge_start(struct ifnet *ifp)
931 {
932 	struct xge_softc *sc = ifp->if_softc;
933 	struct txd *txd = NULL; /* XXX - gcc */
934 	bus_dmamap_t dmp;
935 	struct	mbuf *m;
936 	uint64_t par, lcr;
937 	int nexttx = 0, ntxd, error, i;
938 
939 	if ((ifp->if_flags & (IFF_RUNNING|IFF_OACTIVE)) != IFF_RUNNING)
940 		return;
941 
942 	par = lcr = 0;
943 	for (;;) {
944 		IFQ_POLL(&ifp->if_snd, m);
945 		if (m == NULL)
946 			break;	/* out of packets */
947 
948 		if (sc->sc_nexttx == sc->sc_lasttx)
949 			break;	/* No more space */
950 
951 		nexttx = sc->sc_nexttx;
952 		dmp = sc->sc_txm[nexttx];
953 
954 		if ((error = bus_dmamap_load_mbuf(sc->sc_dmat, dmp, m,
955 		    BUS_DMA_WRITE|BUS_DMA_NOWAIT)) != 0) {
956 			printf("%s: bus_dmamap_load_mbuf error %d\n",
957 			    XNAME, error);
958 			break;
959 		}
960 		IFQ_DEQUEUE(&ifp->if_snd, m);
961 
962 		bus_dmamap_sync(sc->sc_dmat, dmp, 0, dmp->dm_mapsize,
963 		    BUS_DMASYNC_PREWRITE);
964 
965 		txd = sc->sc_txd[nexttx];
966 		sc->sc_txb[nexttx] = m;
967 		for (i = 0; i < dmp->dm_nsegs; i++) {
968 			if (dmp->dm_segs[i].ds_len == 0)
969 				continue;
970 			txd->txd_control1 = dmp->dm_segs[i].ds_len;
971 			txd->txd_control2 = 0;
972 			txd->txd_bufaddr = dmp->dm_segs[i].ds_addr;
973 			txd++;
974 		}
975 		ntxd = txd - sc->sc_txd[nexttx] - 1;
976 		txd = sc->sc_txd[nexttx];
977 		txd->txd_control1 |= TXD_CTL1_OWN|TXD_CTL1_GCF;
978 		txd->txd_control2 = TXD_CTL2_UTIL;
979 		if (m->m_pkthdr.csum_flags & M_CSUM_TSOv4) {
980 			txd->txd_control1 |= TXD_CTL1_MSS(m->m_pkthdr.segsz);
981 			txd->txd_control1 |= TXD_CTL1_LSO;
982 		}
983 
984 		if (m->m_pkthdr.csum_flags & M_CSUM_IPv4)
985 			txd->txd_control2 |= TXD_CTL2_CIPv4;
986 		if (m->m_pkthdr.csum_flags & M_CSUM_TCPv4)
987 			txd->txd_control2 |= TXD_CTL2_CTCP;
988 		if (m->m_pkthdr.csum_flags & M_CSUM_UDPv4)
989 			txd->txd_control2 |= TXD_CTL2_CUDP;
990 		txd[ntxd].txd_control1 |= TXD_CTL1_GCL;
991 
992 		bus_dmamap_sync(sc->sc_dmat, dmp, 0, dmp->dm_mapsize,
993 		    BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE);
994 
995 		par = sc->sc_txdp[nexttx];
996 		lcr = TXDL_NUMTXD(ntxd) | TXDL_LGC_FIRST | TXDL_LGC_LAST;
997 		if (m->m_pkthdr.csum_flags & M_CSUM_TSOv4)
998 			lcr |= TXDL_SFF;
999 		TXP_WCSR(TXDL_PAR, par);
1000 		TXP_WCSR(TXDL_LCR, lcr);
1001 
1002 #if NBPFILTER > 0
1003 		if (ifp->if_bpf)
1004 			bpf_mtap(ifp->if_bpf, m);
1005 #endif /* NBPFILTER > 0 */
1006 
1007 		sc->sc_nexttx = NEXTTX(nexttx);
1008 	}
1009 }
1010 
1011 /*
1012  * Allocate DMA memory for transmit descriptor fragments.
1013  * Only one map is used for all descriptors.
1014  */
1015 int
1016 xge_alloc_txmem(struct xge_softc *sc)
1017 {
1018 	struct txd *txp;
1019 	bus_dma_segment_t seg;
1020 	bus_addr_t txdp;
1021 	void *kva;
1022 	int i, rseg, state;
1023 
1024 #define TXMAPSZ (NTXDESCS*NTXFRAGS*sizeof(struct txd))
1025 	state = 0;
1026 	if (bus_dmamem_alloc(sc->sc_dmat, TXMAPSZ, PAGE_SIZE, 0,
1027 	    &seg, 1, &rseg, BUS_DMA_NOWAIT))
1028 		goto err;
1029 	state++;
1030 	if (bus_dmamem_map(sc->sc_dmat, &seg, rseg, TXMAPSZ, &kva,
1031 	    BUS_DMA_NOWAIT))
1032 		goto err;
1033 
1034 	state++;
1035 	if (bus_dmamap_create(sc->sc_dmat, TXMAPSZ, 1, TXMAPSZ, 0,
1036 	    BUS_DMA_NOWAIT, &sc->sc_txmap))
1037 		goto err;
1038 	state++;
1039 	if (bus_dmamap_load(sc->sc_dmat, sc->sc_txmap,
1040 	    kva, TXMAPSZ, NULL, BUS_DMA_NOWAIT))
1041 		goto err;
1042 
1043 	/* setup transmit array pointers */
1044 	txp = (struct txd *)kva;
1045 	txdp = seg.ds_addr;
1046 	for (txp = (struct txd *)kva, i = 0; i < NTXDESCS; i++) {
1047 		sc->sc_txd[i] = txp;
1048 		sc->sc_txdp[i] = txdp;
1049 		txp += NTXFRAGS;
1050 		txdp += (NTXFRAGS * sizeof(struct txd));
1051 	}
1052 
1053 	return 0;
1054 
1055 err:
1056 	if (state > 2)
1057 		bus_dmamap_destroy(sc->sc_dmat, sc->sc_txmap);
1058 	if (state > 1)
1059 		bus_dmamem_unmap(sc->sc_dmat, kva, TXMAPSZ);
1060 	if (state > 0)
1061 		bus_dmamem_free(sc->sc_dmat, &seg, rseg);
1062 	return ENOBUFS;
1063 }
1064 
1065 /*
1066  * Allocate DMA memory for receive descriptor,
1067  * only one map is used for all descriptors.
1068  * link receive descriptor pages together.
1069  */
1070 int
1071 xge_alloc_rxmem(struct xge_softc *sc)
1072 {
1073 	struct rxd_4k *rxpp;
1074 	bus_dma_segment_t seg;
1075 	void *kva;
1076 	int i, rseg, state;
1077 
1078 	/* sanity check */
1079 	if (sizeof(struct rxd_4k) != XGE_PAGE) {
1080 		printf("bad compiler struct alignment, %d != %d\n",
1081 		    (int)sizeof(struct rxd_4k), XGE_PAGE);
1082 		return EINVAL;
1083 	}
1084 
1085 	state = 0;
1086 	if (bus_dmamem_alloc(sc->sc_dmat, RXMAPSZ, PAGE_SIZE, 0,
1087 	    &seg, 1, &rseg, BUS_DMA_NOWAIT))
1088 		goto err;
1089 	state++;
1090 	if (bus_dmamem_map(sc->sc_dmat, &seg, rseg, RXMAPSZ, &kva,
1091 	    BUS_DMA_NOWAIT))
1092 		goto err;
1093 
1094 	state++;
1095 	if (bus_dmamap_create(sc->sc_dmat, RXMAPSZ, 1, RXMAPSZ, 0,
1096 	    BUS_DMA_NOWAIT, &sc->sc_rxmap))
1097 		goto err;
1098 	state++;
1099 	if (bus_dmamap_load(sc->sc_dmat, sc->sc_rxmap,
1100 	    kva, RXMAPSZ, NULL, BUS_DMA_NOWAIT))
1101 		goto err;
1102 
1103 	/* setup receive page link pointers */
1104 	for (rxpp = (struct rxd_4k *)kva, i = 0; i < NRXPAGES; i++, rxpp++) {
1105 		sc->sc_rxd_4k[i] = rxpp;
1106 		rxpp->r4_next = (uint64_t)sc->sc_rxmap->dm_segs[0].ds_addr +
1107 		    (i*sizeof(struct rxd_4k)) + sizeof(struct rxd_4k);
1108 	}
1109 	sc->sc_rxd_4k[NRXPAGES-1]->r4_next =
1110 	    (uint64_t)sc->sc_rxmap->dm_segs[0].ds_addr;
1111 
1112 	return 0;
1113 
1114 err:
1115 	if (state > 2)
1116 		bus_dmamap_destroy(sc->sc_dmat, sc->sc_txmap);
1117 	if (state > 1)
1118 		bus_dmamem_unmap(sc->sc_dmat, kva, TXMAPSZ);
1119 	if (state > 0)
1120 		bus_dmamem_free(sc->sc_dmat, &seg, rseg);
1121 	return ENOBUFS;
1122 }
1123 
1124 
1125 /*
1126  * Add a new mbuf chain to descriptor id.
1127  */
1128 int
1129 xge_add_rxbuf(struct xge_softc *sc, int id)
1130 {
1131 	struct rxdesc *rxd;
1132 	struct mbuf *m[5];
1133 	int page, desc, error;
1134 #if RX_MODE == RX_MODE_5
1135 	int i;
1136 #endif
1137 
1138 	page = id/NDESC_BUFMODE;
1139 	desc = id%NDESC_BUFMODE;
1140 
1141 	rxd = &sc->sc_rxd_4k[page]->r4_rxd[desc];
1142 
1143 	/*
1144 	 * Allocate mbufs.
1145 	 * Currently five mbufs and two clusters are used,
1146 	 * the hardware will put (ethernet, ip, tcp/udp) headers in
1147 	 * their own buffer and the clusters are only used for data.
1148 	 */
1149 #if RX_MODE == RX_MODE_1
1150 	MGETHDR(m[0], M_DONTWAIT, MT_DATA);
1151 	if (m[0] == NULL)
1152 		return ENOBUFS;
1153 	MCLGET(m[0], M_DONTWAIT);
1154 	if ((m[0]->m_flags & M_EXT) == 0) {
1155 		m_freem(m[0]);
1156 		return ENOBUFS;
1157 	}
1158 	m[0]->m_len = m[0]->m_pkthdr.len = m[0]->m_ext.ext_size;
1159 #elif RX_MODE == RX_MODE_3
1160 #error missing rxmode 3.
1161 #elif RX_MODE == RX_MODE_5
1162 	MGETHDR(m[0], M_DONTWAIT, MT_DATA);
1163 	for (i = 1; i < 5; i++) {
1164 		MGET(m[i], M_DONTWAIT, MT_DATA);
1165 	}
1166 	if (m[3])
1167 		MCLGET(m[3], M_DONTWAIT);
1168 	if (m[4])
1169 		MCLGET(m[4], M_DONTWAIT);
1170 	if (!m[0] || !m[1] || !m[2] || !m[3] || !m[4] ||
1171 	    ((m[3]->m_flags & M_EXT) == 0) || ((m[4]->m_flags & M_EXT) == 0)) {
1172 		/* Out of something */
1173 		for (i = 0; i < 5; i++)
1174 			if (m[i] != NULL)
1175 				m_free(m[i]);
1176 		return ENOBUFS;
1177 	}
1178 	/* Link'em together */
1179 	m[0]->m_next = m[1];
1180 	m[1]->m_next = m[2];
1181 	m[2]->m_next = m[3];
1182 	m[3]->m_next = m[4];
1183 #else
1184 #error bad mode RX_MODE
1185 #endif
1186 
1187 	if (sc->sc_rxb[id])
1188 		bus_dmamap_unload(sc->sc_dmat, sc->sc_rxm[id]);
1189 	sc->sc_rxb[id] = m[0];
1190 
1191 	error = bus_dmamap_load_mbuf(sc->sc_dmat, sc->sc_rxm[id], m[0],
1192 	    BUS_DMA_READ|BUS_DMA_NOWAIT);
1193 	if (error)
1194 		return error;
1195 	bus_dmamap_sync(sc->sc_dmat, sc->sc_rxm[id], 0,
1196 	    sc->sc_rxm[id]->dm_mapsize, BUS_DMASYNC_PREREAD);
1197 
1198 #if RX_MODE == RX_MODE_1
1199 	rxd->rxd_control2 = RXD_MKCTL2(m[0]->m_len, 0, 0);
1200 	rxd->rxd_buf0 = (uint64_t)sc->sc_rxm[id]->dm_segs[0].ds_addr;
1201 	rxd->rxd_control1 = RXD_CTL1_OWN;
1202 #elif RX_MODE == RX_MODE_3
1203 #elif RX_MODE == RX_MODE_5
1204 	rxd->rxd_control3 = RXD_MKCTL3(0, m[3]->m_len, m[4]->m_len);
1205 	rxd->rxd_control2 = RXD_MKCTL2(m[0]->m_len, m[1]->m_len, m[2]->m_len);
1206 	rxd->rxd_buf0 = (uint64_t)sc->sc_rxm[id]->dm_segs[0].ds_addr;
1207 	rxd->rxd_buf1 = (uint64_t)sc->sc_rxm[id]->dm_segs[1].ds_addr;
1208 	rxd->rxd_buf2 = (uint64_t)sc->sc_rxm[id]->dm_segs[2].ds_addr;
1209 	rxd->rxd_buf3 = (uint64_t)sc->sc_rxm[id]->dm_segs[3].ds_addr;
1210 	rxd->rxd_buf4 = (uint64_t)sc->sc_rxm[id]->dm_segs[4].ds_addr;
1211 	rxd->rxd_control1 = RXD_CTL1_OWN;
1212 #endif
1213 
1214 	XGE_RXSYNC(id, BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE);
1215 	return 0;
1216 }
1217 
1218 /*
1219  * These magics comes from the FreeBSD driver.
1220  */
1221 int
1222 xge_setup_xgxs(struct xge_softc *sc)
1223 {
1224 	/* The magic numbers are described in the users guide */
1225 
1226 	/* Writing to MDIO 0x8000 (Global Config 0) */
1227 	PIF_WCSR(DTX_CONTROL, 0x8000051500000000ULL); DELAY(50);
1228 	PIF_WCSR(DTX_CONTROL, 0x80000515000000E0ULL); DELAY(50);
1229 	PIF_WCSR(DTX_CONTROL, 0x80000515D93500E4ULL); DELAY(50);
1230 
1231 	/* Writing to MDIO 0x8000 (Global Config 1) */
1232 	PIF_WCSR(DTX_CONTROL, 0x8001051500000000ULL); DELAY(50);
1233 	PIF_WCSR(DTX_CONTROL, 0x80010515000000e0ULL); DELAY(50);
1234 	PIF_WCSR(DTX_CONTROL, 0x80010515001e00e4ULL); DELAY(50);
1235 
1236 	/* Reset the Gigablaze */
1237 	PIF_WCSR(DTX_CONTROL, 0x8002051500000000ULL); DELAY(50);
1238 	PIF_WCSR(DTX_CONTROL, 0x80020515000000E0ULL); DELAY(50);
1239 	PIF_WCSR(DTX_CONTROL, 0x80020515F21000E4ULL); DELAY(50);
1240 
1241 	/* read the pole settings */
1242 	PIF_WCSR(DTX_CONTROL, 0x8000051500000000ULL); DELAY(50);
1243 	PIF_WCSR(DTX_CONTROL, 0x80000515000000e0ULL); DELAY(50);
1244 	PIF_WCSR(DTX_CONTROL, 0x80000515000000ecULL); DELAY(50);
1245 
1246 	PIF_WCSR(DTX_CONTROL, 0x8001051500000000ULL); DELAY(50);
1247 	PIF_WCSR(DTX_CONTROL, 0x80010515000000e0ULL); DELAY(50);
1248 	PIF_WCSR(DTX_CONTROL, 0x80010515000000ecULL); DELAY(50);
1249 
1250 	PIF_WCSR(DTX_CONTROL, 0x8002051500000000ULL); DELAY(50);
1251 	PIF_WCSR(DTX_CONTROL, 0x80020515000000e0ULL); DELAY(50);
1252 	PIF_WCSR(DTX_CONTROL, 0x80020515000000ecULL); DELAY(50);
1253 
1254 	/* Workaround for TX Lane XAUI initialization error.
1255 	   Read Xpak PHY register 24 for XAUI lane status */
1256 	PIF_WCSR(DTX_CONTROL, 0x0018040000000000ULL); DELAY(50);
1257 	PIF_WCSR(DTX_CONTROL, 0x00180400000000e0ULL); DELAY(50);
1258 	PIF_WCSR(DTX_CONTROL, 0x00180400000000ecULL); DELAY(50);
1259 
1260 	/*
1261 	 * Reading the MDIO control with value 0x1804001c0F001c
1262 	 * means the TxLanes were already in sync
1263 	 * Reading the MDIO control with value 0x1804000c0x001c
1264 	 * means some TxLanes are not in sync where x is a 4-bit
1265 	 * value representing each lanes
1266 	 */
1267 #if 0
1268 	val = PIF_RCSR(MDIO_CONTROL);
1269 	if (val != 0x1804001c0F001cULL) {
1270 		printf("%s: MDIO_CONTROL: %llx != %llx\n",
1271 		    XNAME, val, 0x1804001c0F001cULL);
1272 		return 1;
1273 	}
1274 #endif
1275 
1276 	/* Set and remove the DTE XS INTLoopBackN */
1277 	PIF_WCSR(DTX_CONTROL, 0x0000051500000000ULL); DELAY(50);
1278 	PIF_WCSR(DTX_CONTROL, 0x00000515604000e0ULL); DELAY(50);
1279 	PIF_WCSR(DTX_CONTROL, 0x00000515604000e4ULL); DELAY(50);
1280 	PIF_WCSR(DTX_CONTROL, 0x00000515204000e4ULL); DELAY(50);
1281 	PIF_WCSR(DTX_CONTROL, 0x00000515204000ecULL); DELAY(50);
1282 
1283 #if 0
1284 	/* Reading the DTX control register Should be 0x5152040001c */
1285 	val = PIF_RCSR(DTX_CONTROL);
1286 	if (val != 0x5152040001cULL) {
1287 		printf("%s: DTX_CONTROL: %llx != %llx\n",
1288 		    XNAME, val, 0x5152040001cULL);
1289 		return 1;
1290 	}
1291 #endif
1292 
1293 	PIF_WCSR(MDIO_CONTROL, 0x0018040000000000ULL); DELAY(50);
1294 	PIF_WCSR(MDIO_CONTROL, 0x00180400000000e0ULL); DELAY(50);
1295 	PIF_WCSR(MDIO_CONTROL, 0x00180400000000ecULL); DELAY(50);
1296 
1297 #if 0
1298 	/* Reading the MIOD control should be 0x1804001c0f001c */
1299 	val = PIF_RCSR(MDIO_CONTROL);
1300 	if (val != 0x1804001c0f001cULL) {
1301 		printf("%s: MDIO_CONTROL2: %llx != %llx\n",
1302 		    XNAME, val, 0x1804001c0f001cULL);
1303 		return 1;
1304 	}
1305 #endif
1306 	return 0;
1307 }
1308