xref: /netbsd-src/sys/dev/pci/if_xge.c (revision b757af438b42b93f8c6571f026d8b8ef3eaf5fc9)
1 /*      $NetBSD: if_xge.c,v 1.16 2012/02/02 19:43:06 tls Exp $ */
2 
3 /*
4  * Copyright (c) 2004, SUNET, Swedish University Computer Network.
5  * All rights reserved.
6  *
7  * Written by Anders Magnusson for SUNET, Swedish University Computer Network.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. All advertising materials mentioning features or use of this software
18  *    must display the following acknowledgement:
19  *      This product includes software developed for the NetBSD Project by
20  *      SUNET, Swedish University Computer Network.
21  * 4. The name of SUNET may not be used to endorse or promote products
22  *    derived from this software without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY SUNET ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
26  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
27  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL SUNET
28  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34  * POSSIBILITY OF SUCH DAMAGE.
35  */
36 
37 /*
38  * Device driver for the S2io Xframe Ten Gigabit Ethernet controller.
39  *
40  * TODO (in no specific order):
41  *	HW VLAN support.
42  *	IPv6 HW cksum.
43  */
44 
45 #include <sys/cdefs.h>
46 __KERNEL_RCSID(0, "$NetBSD: if_xge.c,v 1.16 2012/02/02 19:43:06 tls Exp $");
47 
48 
49 #include <sys/param.h>
50 #include <sys/systm.h>
51 #include <sys/mbuf.h>
52 #include <sys/malloc.h>
53 #include <sys/kernel.h>
54 #include <sys/socket.h>
55 #include <sys/device.h>
56 
57 #include <sys/rnd.h>
58 
59 #include <net/if.h>
60 #include <net/if_dl.h>
61 #include <net/if_media.h>
62 #include <net/if_ether.h>
63 
64 #include <net/bpf.h>
65 
66 #include <sys/bus.h>
67 #include <sys/intr.h>
68 #include <machine/endian.h>
69 
70 #include <dev/mii/mii.h>
71 #include <dev/mii/miivar.h>
72 
73 #include <dev/pci/pcivar.h>
74 #include <dev/pci/pcireg.h>
75 #include <dev/pci/pcidevs.h>
76 
77 #include <sys/proc.h>
78 
79 #include <dev/pci/if_xgereg.h>
80 
81 /*
82  * Some tunable constants, tune with care!
83  */
84 #define RX_MODE		RX_MODE_1  /* Receive mode (buffer usage, see below) */
85 #define NRXDESCS	1016	   /* # of receive descriptors (requested) */
86 #define NTXDESCS	8192	   /* Number of transmit descriptors */
87 #define NTXFRAGS	100	   /* Max fragments per packet */
88 #define XGE_EVENT_COUNTERS	   /* Instrumentation */
89 
90 /*
91  * Receive buffer modes; 1, 3 or 5 buffers.
92  */
93 #define RX_MODE_1 1
94 #define RX_MODE_3 3
95 #define RX_MODE_5 5
96 
97 /*
98  * Use clever macros to avoid a bunch of #ifdef's.
99  */
100 #define XCONCAT3(x,y,z) x ## y ## z
101 #define CONCAT3(x,y,z) XCONCAT3(x,y,z)
102 #define NDESC_BUFMODE CONCAT3(NDESC_,RX_MODE,BUFMODE)
103 #define rxd_4k CONCAT3(rxd,RX_MODE,_4k)
104 #define rxdesc ___CONCAT(rxd,RX_MODE)
105 
106 #define NEXTTX(x)	(((x)+1) % NTXDESCS)
107 #define NRXFRAGS	RX_MODE /* hardware imposed frags */
108 #define NRXPAGES	((NRXDESCS/NDESC_BUFMODE)+1)
109 #define NRXREAL		(NRXPAGES*NDESC_BUFMODE)
110 #define RXMAPSZ		(NRXPAGES*PAGE_SIZE)
111 
112 #ifdef XGE_EVENT_COUNTERS
113 #define XGE_EVCNT_INCR(ev)	(ev)->ev_count++
114 #else
115 #define XGE_EVCNT_INCR(ev)	/* nothing */
116 #endif
117 
118 /*
119  * Magics to fix a bug when the mac address can't be read correctly.
120  * Comes from the Linux driver.
121  */
122 static uint64_t fix_mac[] = {
123 	0x0060000000000000ULL, 0x0060600000000000ULL,
124 	0x0040600000000000ULL, 0x0000600000000000ULL,
125 	0x0020600000000000ULL, 0x0060600000000000ULL,
126 	0x0020600000000000ULL, 0x0060600000000000ULL,
127 	0x0020600000000000ULL, 0x0060600000000000ULL,
128 	0x0020600000000000ULL, 0x0060600000000000ULL,
129 	0x0020600000000000ULL, 0x0060600000000000ULL,
130 	0x0020600000000000ULL, 0x0060600000000000ULL,
131 	0x0020600000000000ULL, 0x0060600000000000ULL,
132 	0x0020600000000000ULL, 0x0060600000000000ULL,
133 	0x0020600000000000ULL, 0x0060600000000000ULL,
134 	0x0020600000000000ULL, 0x0060600000000000ULL,
135 	0x0020600000000000ULL, 0x0000600000000000ULL,
136 	0x0040600000000000ULL, 0x0060600000000000ULL,
137 };
138 
139 
140 struct xge_softc {
141 	struct device sc_dev;
142 	struct ethercom sc_ethercom;
143 #define sc_if sc_ethercom.ec_if
144 	bus_dma_tag_t sc_dmat;
145 	bus_space_tag_t sc_st;
146 	bus_space_handle_t sc_sh;
147 	bus_space_tag_t sc_txt;
148 	bus_space_handle_t sc_txh;
149 	void *sc_ih;
150 
151 	struct ifmedia xena_media;
152 	pcireg_t sc_pciregs[16];
153 
154 	/* Transmit structures */
155 	struct txd *sc_txd[NTXDESCS];	/* transmit frags array */
156 	bus_addr_t sc_txdp[NTXDESCS];	/* bus address of transmit frags */
157 	bus_dmamap_t sc_txm[NTXDESCS];	/* transmit frags map */
158 	struct mbuf *sc_txb[NTXDESCS];	/* transmit mbuf pointer */
159 	int sc_nexttx, sc_lasttx;
160 	bus_dmamap_t sc_txmap;		/* transmit descriptor map */
161 
162 	/* Receive data */
163 	bus_dmamap_t sc_rxmap;		/* receive descriptor map */
164 	struct rxd_4k *sc_rxd_4k[NRXPAGES]; /* receive desc pages */
165 	bus_dmamap_t sc_rxm[NRXREAL];	/* receive buffer map */
166 	struct mbuf *sc_rxb[NRXREAL];	/* mbufs on receive descriptors */
167 	int sc_nextrx;			/* next descriptor to check */
168 
169 #ifdef XGE_EVENT_COUNTERS
170 	struct evcnt sc_intr;	/* # of interrupts */
171 	struct evcnt sc_txintr;	/* # of transmit interrupts */
172 	struct evcnt sc_rxintr;	/* # of receive interrupts */
173 	struct evcnt sc_txqe;	/* # of xmit intrs when board queue empty */
174 #endif
175 };
176 
177 static int xge_match(device_t parent, cfdata_t cf, void *aux);
178 static void xge_attach(device_t parent, device_t self, void *aux);
179 static int xge_alloc_txmem(struct xge_softc *);
180 static int xge_alloc_rxmem(struct xge_softc *);
181 static void xge_start(struct ifnet *);
182 static void xge_stop(struct ifnet *, int);
183 static int xge_add_rxbuf(struct xge_softc *, int);
184 static void xge_mcast_filter(struct xge_softc *sc);
185 static int xge_setup_xgxs(struct xge_softc *sc);
186 static int xge_ioctl(struct ifnet *ifp, u_long cmd, void *data);
187 static int xge_init(struct ifnet *ifp);
188 static void xge_ifmedia_status(struct ifnet *, struct ifmediareq *);
189 static int xge_xgmii_mediachange(struct ifnet *);
190 static int xge_intr(void  *);
191 
192 /*
193  * Helpers to address registers.
194  */
195 #define PIF_WCSR(csr, val)	pif_wcsr(sc, csr, val)
196 #define PIF_RCSR(csr)		pif_rcsr(sc, csr)
197 #define TXP_WCSR(csr, val)	txp_wcsr(sc, csr, val)
198 #define PIF_WKEY(csr, val)	pif_wkey(sc, csr, val)
199 
200 static inline void
201 pif_wcsr(struct xge_softc *sc, bus_size_t csr, uint64_t val)
202 {
203 	uint32_t lval, hval;
204 
205 	lval = val&0xffffffff;
206 	hval = val>>32;
207 	bus_space_write_4(sc->sc_st, sc->sc_sh, csr, lval);
208 	bus_space_write_4(sc->sc_st, sc->sc_sh, csr+4, hval);
209 }
210 
211 static inline uint64_t
212 pif_rcsr(struct xge_softc *sc, bus_size_t csr)
213 {
214 	uint64_t val, val2;
215 	val = bus_space_read_4(sc->sc_st, sc->sc_sh, csr);
216 	val2 = bus_space_read_4(sc->sc_st, sc->sc_sh, csr+4);
217 	val |= (val2 << 32);
218 	return val;
219 }
220 
221 static inline void
222 txp_wcsr(struct xge_softc *sc, bus_size_t csr, uint64_t val)
223 {
224 	uint32_t lval, hval;
225 
226 	lval = val&0xffffffff;
227 	hval = val>>32;
228 	bus_space_write_4(sc->sc_txt, sc->sc_txh, csr, lval);
229 	bus_space_write_4(sc->sc_txt, sc->sc_txh, csr+4, hval);
230 }
231 
232 
233 static inline void
234 pif_wkey(struct xge_softc *sc, bus_size_t csr, uint64_t val)
235 {
236 	uint32_t lval, hval;
237 
238 	lval = val&0xffffffff;
239 	hval = val>>32;
240 	PIF_WCSR(RMAC_CFG_KEY, RMAC_KEY_VALUE);
241 	bus_space_write_4(sc->sc_st, sc->sc_sh, csr, lval);
242 	PIF_WCSR(RMAC_CFG_KEY, RMAC_KEY_VALUE);
243 	bus_space_write_4(sc->sc_st, sc->sc_sh, csr+4, hval);
244 }
245 
246 
247 CFATTACH_DECL(xge, sizeof(struct xge_softc),
248     xge_match, xge_attach, NULL, NULL);
249 
250 #define XNAME device_xname(&sc->sc_dev)
251 
252 #define XGE_RXSYNC(desc, what) \
253 	bus_dmamap_sync(sc->sc_dmat, sc->sc_rxmap, \
254 	(desc/NDESC_BUFMODE) * XGE_PAGE + sizeof(struct rxdesc) * \
255 	(desc%NDESC_BUFMODE), sizeof(struct rxdesc), what)
256 #define XGE_RXD(desc)	&sc->sc_rxd_4k[desc/NDESC_BUFMODE]-> \
257 	r4_rxd[desc%NDESC_BUFMODE]
258 
259 /*
260  * Non-tunable constants.
261  */
262 #define XGE_MAX_MTU		9600
263 #define	XGE_IP_MAXPACKET	65535	/* same as IP_MAXPACKET */
264 
265 static int
266 xge_match(device_t parent, cfdata_t cf, void *aux)
267 {
268 	struct pci_attach_args *pa = aux;
269 
270 	if (PCI_VENDOR(pa->pa_id) == PCI_VENDOR_S2IO &&
271 	    PCI_PRODUCT(pa->pa_id) == PCI_PRODUCT_S2IO_XFRAME)
272 		return (1);
273 
274 	return (0);
275 }
276 
277 void
278 xge_attach(device_t parent, device_t self, void *aux)
279 {
280 	struct pci_attach_args *pa = aux;
281 	struct xge_softc *sc;
282 	struct ifnet *ifp;
283 	pcireg_t memtype;
284 	pci_intr_handle_t ih;
285 	const char *intrstr = NULL;
286 	pci_chipset_tag_t pc = pa->pa_pc;
287 	uint8_t enaddr[ETHER_ADDR_LEN];
288 	uint64_t val;
289 	int i;
290 
291 	sc = device_private(self);
292 
293 	sc->sc_dmat = pa->pa_dmat;
294 
295 	/* Get BAR0 address */
296 	memtype = pci_mapreg_type(pa->pa_pc, pa->pa_tag, XGE_PIF_BAR);
297 	if (pci_mapreg_map(pa, XGE_PIF_BAR, memtype, 0,
298 	    &sc->sc_st, &sc->sc_sh, 0, 0)) {
299 		aprint_error("%s: unable to map PIF BAR registers\n", XNAME);
300 		return;
301 	}
302 
303 	memtype = pci_mapreg_type(pa->pa_pc, pa->pa_tag, XGE_TXP_BAR);
304 	if (pci_mapreg_map(pa, XGE_TXP_BAR, memtype, 0,
305 	    &sc->sc_txt, &sc->sc_txh, 0, 0)) {
306 		aprint_error("%s: unable to map TXP BAR registers\n", XNAME);
307 		return;
308 	}
309 
310 	/* Save PCI config space */
311 	for (i = 0; i < 64; i += 4)
312 		sc->sc_pciregs[i/4] = pci_conf_read(pa->pa_pc, pa->pa_tag, i);
313 
314 #if BYTE_ORDER == LITTLE_ENDIAN
315 	val = (uint64_t)0xFFFFFFFFFFFFFFFFULL;
316 	val &= ~(TxF_R_SE|RxF_W_SE);
317 	PIF_WCSR(SWAPPER_CTRL, val);
318 	PIF_WCSR(SWAPPER_CTRL, val);
319 #elif BYTE_ORDER == BIG_ENDIAN
320 	/* do nothing */
321 #else
322 #error bad endianness!
323 #endif
324 
325 	if ((val = PIF_RCSR(PIF_RD_SWAPPER_Fb)) != SWAPPER_MAGIC)
326 		return printf("%s: failed configuring endian, %llx != %llx!\n",
327 		    XNAME, (unsigned long long)val, SWAPPER_MAGIC);
328 
329 	/*
330 	 * The MAC addr may be all FF's, which is not good.
331 	 * Resolve it by writing some magics to GPIO_CONTROL and
332 	 * force a chip reset to read in the serial eeprom again.
333 	 */
334 	for (i = 0; i < sizeof(fix_mac)/sizeof(fix_mac[0]); i++) {
335 		PIF_WCSR(GPIO_CONTROL, fix_mac[i]);
336 		PIF_RCSR(GPIO_CONTROL);
337 	}
338 
339 	/*
340 	 * Reset the chip and restore the PCI registers.
341 	 */
342 	PIF_WCSR(SW_RESET, 0xa5a5a50000000000ULL);
343 	DELAY(500000);
344 	for (i = 0; i < 64; i += 4)
345 		pci_conf_write(pa->pa_pc, pa->pa_tag, i, sc->sc_pciregs[i/4]);
346 
347 	/*
348 	 * Restore the byte order registers.
349 	 */
350 #if BYTE_ORDER == LITTLE_ENDIAN
351 	val = (uint64_t)0xFFFFFFFFFFFFFFFFULL;
352 	val &= ~(TxF_R_SE|RxF_W_SE);
353 	PIF_WCSR(SWAPPER_CTRL, val);
354 	PIF_WCSR(SWAPPER_CTRL, val);
355 #elif BYTE_ORDER == BIG_ENDIAN
356 	/* do nothing */
357 #else
358 #error bad endianness!
359 #endif
360 
361 	if ((val = PIF_RCSR(PIF_RD_SWAPPER_Fb)) != SWAPPER_MAGIC)
362 		return printf("%s: failed configuring endian2, %llx != %llx!\n",
363 		    XNAME, (unsigned long long)val, SWAPPER_MAGIC);
364 
365 	/*
366 	 * XGXS initialization.
367 	 */
368 	/* 29, reset */
369 	PIF_WCSR(SW_RESET, 0);
370 	DELAY(500000);
371 
372 	/* 30, configure XGXS transceiver */
373 	xge_setup_xgxs(sc);
374 
375 	/* 33, program MAC address (not needed here) */
376 	/* Get ethernet address */
377 	PIF_WCSR(RMAC_ADDR_CMD_MEM,
378 	    RMAC_ADDR_CMD_MEM_STR|RMAC_ADDR_CMD_MEM_OFF(0));
379 	while (PIF_RCSR(RMAC_ADDR_CMD_MEM) & RMAC_ADDR_CMD_MEM_STR)
380 		;
381 	val = PIF_RCSR(RMAC_ADDR_DATA0_MEM);
382 	for (i = 0; i < ETHER_ADDR_LEN; i++)
383 		enaddr[i] = (uint8_t)(val >> (56 - (8*i)));
384 
385 	/*
386 	 * Get memory for transmit descriptor lists.
387 	 */
388 	if (xge_alloc_txmem(sc))
389 		return printf("%s: failed allocating txmem.\n", XNAME);
390 
391 	/* 9 and 10 - set FIFO number/prio */
392 	PIF_WCSR(TX_FIFO_P0, TX_FIFO_LEN0(NTXDESCS));
393 	PIF_WCSR(TX_FIFO_P1, 0ULL);
394 	PIF_WCSR(TX_FIFO_P2, 0ULL);
395 	PIF_WCSR(TX_FIFO_P3, 0ULL);
396 
397 	/* 11, XXX set round-robin prio? */
398 
399 	/* 12, enable transmit FIFO */
400 	val = PIF_RCSR(TX_FIFO_P0);
401 	val |= TX_FIFO_ENABLE;
402 	PIF_WCSR(TX_FIFO_P0, val);
403 
404 	/* 13, disable some error checks */
405 	PIF_WCSR(TX_PA_CFG,
406 	    TX_PA_CFG_IFR|TX_PA_CFG_ISO|TX_PA_CFG_ILC|TX_PA_CFG_ILE);
407 
408 	/*
409 	 * Create transmit DMA maps.
410 	 * Make them large for TSO.
411 	 */
412 	for (i = 0; i < NTXDESCS; i++) {
413 		if (bus_dmamap_create(sc->sc_dmat, XGE_IP_MAXPACKET,
414 		    NTXFRAGS, MCLBYTES, 0, 0, &sc->sc_txm[i]))
415 			return printf("%s: cannot create TX DMA maps\n", XNAME);
416 	}
417 
418 	sc->sc_lasttx = NTXDESCS-1;
419 
420 	/*
421 	 * RxDMA initialization.
422 	 * Only use one out of 8 possible receive queues.
423 	 */
424 	if (xge_alloc_rxmem(sc))	/* allocate rx descriptor memory */
425 		return printf("%s: failed allocating rxmem\n", XNAME);
426 
427 	/* Create receive buffer DMA maps */
428 	for (i = 0; i < NRXREAL; i++) {
429 		if (bus_dmamap_create(sc->sc_dmat, XGE_MAX_MTU,
430 		    NRXFRAGS, MCLBYTES, 0, 0, &sc->sc_rxm[i]))
431 			return printf("%s: cannot create RX DMA maps\n", XNAME);
432 	}
433 
434 	/* allocate mbufs to receive descriptors */
435 	for (i = 0; i < NRXREAL; i++)
436 		if (xge_add_rxbuf(sc, i))
437 			panic("out of mbufs too early");
438 
439 	/* 14, setup receive ring priority */
440 	PIF_WCSR(RX_QUEUE_PRIORITY, 0ULL); /* only use one ring */
441 
442 	/* 15, setup receive ring round-robin calendar */
443 	PIF_WCSR(RX_W_ROUND_ROBIN_0, 0ULL); /* only use one ring */
444 	PIF_WCSR(RX_W_ROUND_ROBIN_1, 0ULL);
445 	PIF_WCSR(RX_W_ROUND_ROBIN_2, 0ULL);
446 	PIF_WCSR(RX_W_ROUND_ROBIN_3, 0ULL);
447 	PIF_WCSR(RX_W_ROUND_ROBIN_4, 0ULL);
448 
449 	/* 16, write receive ring start address */
450 	PIF_WCSR(PRC_RXD0_0, (uint64_t)sc->sc_rxmap->dm_segs[0].ds_addr);
451 	/* PRC_RXD0_[1-7] are not used */
452 
453 	/* 17, Setup alarm registers */
454 	PIF_WCSR(PRC_ALARM_ACTION, 0ULL); /* Default everything to retry */
455 
456 	/* 18, init receive ring controller */
457 #if RX_MODE == RX_MODE_1
458 	val = RING_MODE_1;
459 #elif RX_MODE == RX_MODE_3
460 	val = RING_MODE_3;
461 #else /* RX_MODE == RX_MODE_5 */
462 	val = RING_MODE_5;
463 #endif
464 	PIF_WCSR(PRC_CTRL_0, RC_IN_SVC|val);
465 	/* leave 1-7 disabled */
466 	/* XXXX snoop configuration? */
467 
468 	/* 19, set chip memory assigned to the queue */
469 	PIF_WCSR(RX_QUEUE_CFG, MC_QUEUE(0, 64)); /* all 64M to queue 0 */
470 
471 	/* 20, setup RLDRAM parameters */
472 	/* do not touch it for now */
473 
474 	/* 21, setup pause frame thresholds */
475 	/* so not touch the defaults */
476 	/* XXX - must 0xff be written as stated in the manual? */
477 
478 	/* 22, configure RED */
479 	/* we do not want to drop packets, so ignore */
480 
481 	/* 23, initiate RLDRAM */
482 	val = PIF_RCSR(MC_RLDRAM_MRS);
483 	val |= MC_QUEUE_SIZE_ENABLE|MC_RLDRAM_MRS_ENABLE;
484 	PIF_WCSR(MC_RLDRAM_MRS, val);
485 	DELAY(1000);
486 
487 	/*
488 	 * Setup interrupt policies.
489 	 */
490 	/* 40, Transmit interrupts */
491 	PIF_WCSR(TTI_DATA1_MEM, TX_TIMER_VAL(0x1ff) | TX_TIMER_AC |
492 	    TX_URNG_A(5) | TX_URNG_B(20) | TX_URNG_C(48));
493 	PIF_WCSR(TTI_DATA2_MEM,
494 	    TX_UFC_A(25) | TX_UFC_B(64) | TX_UFC_C(128) | TX_UFC_D(512));
495 	PIF_WCSR(TTI_COMMAND_MEM, TTI_CMD_MEM_WE | TTI_CMD_MEM_STROBE);
496 	while (PIF_RCSR(TTI_COMMAND_MEM) & TTI_CMD_MEM_STROBE)
497 		;
498 
499 	/* 41, Receive interrupts */
500 	PIF_WCSR(RTI_DATA1_MEM, RX_TIMER_VAL(0x800) | RX_TIMER_AC |
501 	    RX_URNG_A(5) | RX_URNG_B(20) | RX_URNG_C(50));
502 	PIF_WCSR(RTI_DATA2_MEM,
503 	    RX_UFC_A(64) | RX_UFC_B(128) | RX_UFC_C(256) | RX_UFC_D(512));
504 	PIF_WCSR(RTI_COMMAND_MEM, RTI_CMD_MEM_WE | RTI_CMD_MEM_STROBE);
505 	while (PIF_RCSR(RTI_COMMAND_MEM) & RTI_CMD_MEM_STROBE)
506 		;
507 
508 	/*
509 	 * Setup media stuff.
510 	 */
511 	ifmedia_init(&sc->xena_media, IFM_IMASK, xge_xgmii_mediachange,
512 	    xge_ifmedia_status);
513 	ifmedia_add(&sc->xena_media, IFM_ETHER|IFM_10G_LR, 0, NULL);
514 	ifmedia_set(&sc->xena_media, IFM_ETHER|IFM_10G_LR);
515 
516 	aprint_normal("%s: Ethernet address %s\n", XNAME,
517 	    ether_sprintf(enaddr));
518 
519 	ifp = &sc->sc_ethercom.ec_if;
520 	strlcpy(ifp->if_xname, device_xname(&sc->sc_dev), IFNAMSIZ);
521 	ifp->if_baudrate = 10000000000LL;
522 	ifp->if_init = xge_init;
523 	ifp->if_stop = xge_stop;
524 	ifp->if_softc = sc;
525 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
526 	ifp->if_ioctl = xge_ioctl;
527 	ifp->if_start = xge_start;
528 	IFQ_SET_MAXLEN(&ifp->if_snd, max(NTXDESCS - 1, IFQ_MAXLEN));
529 	IFQ_SET_READY(&ifp->if_snd);
530 
531 	/*
532 	 * Offloading capabilities.
533 	 */
534 	sc->sc_ethercom.ec_capabilities |=
535 	    ETHERCAP_JUMBO_MTU | ETHERCAP_VLAN_MTU;
536 	ifp->if_capabilities |=
537 	    IFCAP_CSUM_IPv4_Rx | IFCAP_CSUM_IPv4_Tx |
538 	    IFCAP_CSUM_TCPv4_Rx | IFCAP_CSUM_TCPv4_Tx |
539 	    IFCAP_CSUM_UDPv4_Rx | IFCAP_CSUM_UDPv4_Tx | IFCAP_TSOv4;
540 
541 	/*
542 	 * Attach the interface.
543 	 */
544 	if_attach(ifp);
545 	ether_ifattach(ifp, enaddr);
546 
547 	/*
548 	 * Setup interrupt vector before initializing.
549 	 */
550 	if (pci_intr_map(pa, &ih))
551 		return aprint_error_dev(&sc->sc_dev, "unable to map interrupt\n");
552 	intrstr = pci_intr_string(pc, ih);
553 	if ((sc->sc_ih =
554 	    pci_intr_establish(pc, ih, IPL_NET, xge_intr, sc)) == NULL)
555 		return aprint_error_dev(&sc->sc_dev, "unable to establish interrupt at %s\n",
556 		    intrstr ? intrstr : "<unknown>");
557 	aprint_normal_dev(&sc->sc_dev, "interrupting at %s\n", intrstr);
558 
559 #ifdef XGE_EVENT_COUNTERS
560 	evcnt_attach_dynamic(&sc->sc_intr, EVCNT_TYPE_MISC,
561 	    NULL, XNAME, "intr");
562 	evcnt_attach_dynamic(&sc->sc_txintr, EVCNT_TYPE_MISC,
563 	    NULL, XNAME, "txintr");
564 	evcnt_attach_dynamic(&sc->sc_rxintr, EVCNT_TYPE_MISC,
565 	    NULL, XNAME, "rxintr");
566 	evcnt_attach_dynamic(&sc->sc_txqe, EVCNT_TYPE_MISC,
567 	    NULL, XNAME, "txqe");
568 #endif
569 }
570 
571 void
572 xge_ifmedia_status(struct ifnet *ifp, struct ifmediareq *ifmr)
573 {
574 	struct xge_softc *sc = ifp->if_softc;
575 	uint64_t reg;
576 
577 	ifmr->ifm_status = IFM_AVALID;
578 	ifmr->ifm_active = IFM_ETHER|IFM_10G_LR;
579 
580 	reg = PIF_RCSR(ADAPTER_STATUS);
581 	if ((reg & (RMAC_REMOTE_FAULT|RMAC_LOCAL_FAULT)) == 0)
582 		ifmr->ifm_status |= IFM_ACTIVE;
583 }
584 
585 int
586 xge_xgmii_mediachange(struct ifnet *ifp)
587 {
588 	return 0;
589 }
590 
591 static void
592 xge_enable(struct xge_softc *sc)
593 {
594 	uint64_t val;
595 
596 	/* 2, enable adapter */
597 	val = PIF_RCSR(ADAPTER_CONTROL);
598 	val |= ADAPTER_EN;
599 	PIF_WCSR(ADAPTER_CONTROL, val);
600 
601 	/* 3, light the card enable led */
602 	val = PIF_RCSR(ADAPTER_CONTROL);
603 	val |= LED_ON;
604 	PIF_WCSR(ADAPTER_CONTROL, val);
605 	printf("%s: link up\n", XNAME);
606 
607 }
608 
609 int
610 xge_init(struct ifnet *ifp)
611 {
612 	struct xge_softc *sc = ifp->if_softc;
613 	uint64_t val;
614 
615 	if (ifp->if_flags & IFF_RUNNING)
616 		return 0;
617 
618 	/* 31+32, setup MAC config */
619 	PIF_WKEY(MAC_CFG, TMAC_EN|RMAC_EN|TMAC_APPEND_PAD|RMAC_STRIP_FCS|
620 	    RMAC_BCAST_EN|RMAC_DISCARD_PFRM|RMAC_PROM_EN);
621 
622 	DELAY(1000);
623 
624 	/* 54, ensure that the adapter is 'quiescent' */
625 	val = PIF_RCSR(ADAPTER_STATUS);
626 	if ((val & QUIESCENT) != QUIESCENT) {
627 		char buf[200];
628 		printf("%s: adapter not quiescent, aborting\n", XNAME);
629 		val = (val & QUIESCENT) ^ QUIESCENT;
630 		snprintb(buf, sizeof buf, QUIESCENT_BMSK, val);
631 		printf("%s: ADAPTER_STATUS missing bits %s\n", XNAME, buf);
632 		return 1;
633 	}
634 
635 	/* 56, enable the transmit laser */
636 	val = PIF_RCSR(ADAPTER_CONTROL);
637 	val |= EOI_TX_ON;
638 	PIF_WCSR(ADAPTER_CONTROL, val);
639 
640 	xge_enable(sc);
641 	/*
642 	 * Enable all interrupts
643 	 */
644 	PIF_WCSR(TX_TRAFFIC_MASK, 0);
645 	PIF_WCSR(RX_TRAFFIC_MASK, 0);
646 	PIF_WCSR(GENERAL_INT_MASK, 0);
647 	PIF_WCSR(TXPIC_INT_MASK, 0);
648 	PIF_WCSR(RXPIC_INT_MASK, 0);
649 	PIF_WCSR(MAC_INT_MASK, MAC_TMAC_INT); /* only from RMAC */
650 	PIF_WCSR(MAC_RMAC_ERR_MASK, ~RMAC_LINK_STATE_CHANGE_INT);
651 
652 
653 	/* Done... */
654 	ifp->if_flags |= IFF_RUNNING;
655 	ifp->if_flags &= ~IFF_OACTIVE;
656 
657 	return 0;
658 }
659 
660 static void
661 xge_stop(struct ifnet *ifp, int disable)
662 {
663 	struct xge_softc *sc = ifp->if_softc;
664 	uint64_t val;
665 
666 	val = PIF_RCSR(ADAPTER_CONTROL);
667 	val &= ~ADAPTER_EN;
668 	PIF_WCSR(ADAPTER_CONTROL, val);
669 
670 	while ((PIF_RCSR(ADAPTER_STATUS) & QUIESCENT) != QUIESCENT)
671 		;
672 }
673 
674 int
675 xge_intr(void *pv)
676 {
677 	struct xge_softc *sc = pv;
678 	struct txd *txd;
679 	struct ifnet *ifp = &sc->sc_if;
680 	bus_dmamap_t dmp;
681 	uint64_t val;
682 	int i, lasttx, plen;
683 
684 	val = PIF_RCSR(GENERAL_INT_STATUS);
685 	if (val == 0)
686 		return 0; /* no interrupt here */
687 
688 	XGE_EVCNT_INCR(&sc->sc_intr);
689 
690 	PIF_WCSR(GENERAL_INT_STATUS, val);
691 
692 	if ((val = PIF_RCSR(MAC_RMAC_ERR_REG)) & RMAC_LINK_STATE_CHANGE_INT) {
693 		/* Wait for quiescence */
694 		printf("%s: link down\n", XNAME);
695 		while ((PIF_RCSR(ADAPTER_STATUS) & QUIESCENT) != QUIESCENT)
696 			;
697 		PIF_WCSR(MAC_RMAC_ERR_REG, RMAC_LINK_STATE_CHANGE_INT);
698 
699 		val = PIF_RCSR(ADAPTER_STATUS);
700 		if ((val & (RMAC_REMOTE_FAULT|RMAC_LOCAL_FAULT)) == 0)
701 			xge_enable(sc); /* Only if link restored */
702 	}
703 
704 	if ((val = PIF_RCSR(TX_TRAFFIC_INT))) {
705 		XGE_EVCNT_INCR(&sc->sc_txintr);
706 		PIF_WCSR(TX_TRAFFIC_INT, val); /* clear interrupt bits */
707 	}
708 	/*
709 	 * Collect sent packets.
710 	 */
711 	lasttx = sc->sc_lasttx;
712 	while ((i = NEXTTX(sc->sc_lasttx)) != sc->sc_nexttx) {
713 		txd = sc->sc_txd[i];
714 		dmp = sc->sc_txm[i];
715 
716 		bus_dmamap_sync(sc->sc_dmat, dmp, 0,
717 		    dmp->dm_mapsize,
718 		    BUS_DMASYNC_POSTREAD|BUS_DMASYNC_POSTWRITE);
719 
720 		if (txd->txd_control1 & TXD_CTL1_OWN) {
721 			bus_dmamap_sync(sc->sc_dmat, dmp, 0,
722 			    dmp->dm_mapsize, BUS_DMASYNC_PREREAD);
723 			break;
724 		}
725 		bus_dmamap_unload(sc->sc_dmat, dmp);
726 		m_freem(sc->sc_txb[i]);
727 		ifp->if_opackets++;
728 		sc->sc_lasttx = i;
729 	}
730 	if (i == sc->sc_nexttx) {
731 		XGE_EVCNT_INCR(&sc->sc_txqe);
732 	}
733 
734 	if (sc->sc_lasttx != lasttx)
735 		ifp->if_flags &= ~IFF_OACTIVE;
736 
737 	xge_start(ifp); /* Try to get more packets on the wire */
738 
739 	if ((val = PIF_RCSR(RX_TRAFFIC_INT))) {
740 		XGE_EVCNT_INCR(&sc->sc_rxintr);
741 		PIF_WCSR(RX_TRAFFIC_INT, val); /* clear interrupt bits */
742 	}
743 
744 	for (;;) {
745 		struct rxdesc *rxd;
746 		struct mbuf *m;
747 
748 		XGE_RXSYNC(sc->sc_nextrx,
749 		    BUS_DMASYNC_POSTREAD|BUS_DMASYNC_POSTWRITE);
750 
751 		rxd = XGE_RXD(sc->sc_nextrx);
752 		if (rxd->rxd_control1 & RXD_CTL1_OWN) {
753 			XGE_RXSYNC(sc->sc_nextrx, BUS_DMASYNC_PREREAD);
754 			break;
755 		}
756 
757 		/* got a packet */
758 		m = sc->sc_rxb[sc->sc_nextrx];
759 #if RX_MODE == RX_MODE_1
760 		plen = m->m_len = RXD_CTL2_BUF0SIZ(rxd->rxd_control2);
761 #elif RX_MODE == RX_MODE_3
762 #error Fix rxmodes in xge_intr
763 #elif RX_MODE == RX_MODE_5
764 		plen = m->m_len = RXD_CTL2_BUF0SIZ(rxd->rxd_control2);
765 		plen += m->m_next->m_len = RXD_CTL2_BUF1SIZ(rxd->rxd_control2);
766 		plen += m->m_next->m_next->m_len =
767 		    RXD_CTL2_BUF2SIZ(rxd->rxd_control2);
768 		plen += m->m_next->m_next->m_next->m_len =
769 		    RXD_CTL3_BUF3SIZ(rxd->rxd_control3);
770 		plen += m->m_next->m_next->m_next->m_next->m_len =
771 		    RXD_CTL3_BUF4SIZ(rxd->rxd_control3);
772 #endif
773 		m->m_pkthdr.rcvif = ifp;
774 		m->m_pkthdr.len = plen;
775 
776 		val = rxd->rxd_control1;
777 
778 		if (xge_add_rxbuf(sc, sc->sc_nextrx)) {
779 			/* Failed, recycle this mbuf */
780 #if RX_MODE == RX_MODE_1
781 			rxd->rxd_control2 = RXD_MKCTL2(MCLBYTES, 0, 0);
782 			rxd->rxd_control1 = RXD_CTL1_OWN;
783 #elif RX_MODE == RX_MODE_3
784 #elif RX_MODE == RX_MODE_5
785 #endif
786 			XGE_RXSYNC(sc->sc_nextrx,
787 			    BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE);
788 			ifp->if_ierrors++;
789 			break;
790 		}
791 
792 		ifp->if_ipackets++;
793 
794 		if (RXD_CTL1_PROTOS(val) & (RXD_CTL1_P_IPv4|RXD_CTL1_P_IPv6)) {
795 			m->m_pkthdr.csum_flags |= M_CSUM_IPv4;
796 			if (RXD_CTL1_L3CSUM(val) != 0xffff)
797 				m->m_pkthdr.csum_flags |= M_CSUM_IPv4_BAD;
798 		}
799 		if (RXD_CTL1_PROTOS(val) & RXD_CTL1_P_TCP) {
800 			m->m_pkthdr.csum_flags |= M_CSUM_TCPv4|M_CSUM_TCPv6;
801 			if (RXD_CTL1_L4CSUM(val) != 0xffff)
802 				m->m_pkthdr.csum_flags |= M_CSUM_TCP_UDP_BAD;
803 		}
804 		if (RXD_CTL1_PROTOS(val) & RXD_CTL1_P_UDP) {
805 			m->m_pkthdr.csum_flags |= M_CSUM_UDPv4|M_CSUM_UDPv6;
806 			if (RXD_CTL1_L4CSUM(val) != 0xffff)
807 				m->m_pkthdr.csum_flags |= M_CSUM_TCP_UDP_BAD;
808 		}
809 
810 		bpf_mtap(ifp, m);
811 
812 		(*ifp->if_input)(ifp, m);
813 
814 		if (++sc->sc_nextrx == NRXREAL)
815 			sc->sc_nextrx = 0;
816 
817 	}
818 
819 	return 0;
820 }
821 
822 int
823 xge_ioctl(struct ifnet *ifp, u_long cmd, void *data)
824 {
825 	struct xge_softc *sc = ifp->if_softc;
826 	struct ifreq *ifr = (struct ifreq *) data;
827 	int s, error = 0;
828 
829 	s = splnet();
830 
831 	switch (cmd) {
832 	case SIOCSIFMTU:
833 		if (ifr->ifr_mtu < ETHERMIN || ifr->ifr_mtu > XGE_MAX_MTU)
834 			error = EINVAL;
835 		else if ((error = ifioctl_common(ifp, cmd, data)) == ENETRESET){
836 			PIF_WCSR(RMAC_MAX_PYLD_LEN,
837 			    RMAC_PYLD_LEN(ifr->ifr_mtu));
838 			error = 0;
839 		}
840 		break;
841 
842 	case SIOCGIFMEDIA:
843 	case SIOCSIFMEDIA:
844 		error = ifmedia_ioctl(ifp, ifr, &sc->xena_media, cmd);
845 		break;
846 
847 	default:
848 		if ((error = ether_ioctl(ifp, cmd, data)) != ENETRESET)
849 			break;
850 
851 		error = 0;
852 
853 		if (cmd != SIOCADDMULTI && cmd != SIOCDELMULTI)
854 			;
855 		else if (ifp->if_flags & IFF_RUNNING) {
856 			/* Change multicast list */
857 			xge_mcast_filter(sc);
858 		}
859 		break;
860 	}
861 
862 	splx(s);
863 	return(error);
864 }
865 
866 void
867 xge_mcast_filter(struct xge_softc *sc)
868 {
869 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
870 	struct ethercom *ec = &sc->sc_ethercom;
871 	struct ether_multi *enm;
872 	struct ether_multistep step;
873 	int i, numaddr = 1; /* first slot used for card unicast address */
874 	uint64_t val;
875 
876 	ETHER_FIRST_MULTI(step, ec, enm);
877 	while (enm != NULL) {
878 		if (memcmp(enm->enm_addrlo, enm->enm_addrhi, ETHER_ADDR_LEN)) {
879 			/* Skip ranges */
880 			goto allmulti;
881 		}
882 		if (numaddr == MAX_MCAST_ADDR)
883 			goto allmulti;
884 		for (val = 0, i = 0; i < ETHER_ADDR_LEN; i++) {
885 			val <<= 8;
886 			val |= enm->enm_addrlo[i];
887 		}
888 		PIF_WCSR(RMAC_ADDR_DATA0_MEM, val << 16);
889 		PIF_WCSR(RMAC_ADDR_DATA1_MEM, 0xFFFFFFFFFFFFFFFFULL);
890 		PIF_WCSR(RMAC_ADDR_CMD_MEM, RMAC_ADDR_CMD_MEM_WE|
891 		    RMAC_ADDR_CMD_MEM_STR|RMAC_ADDR_CMD_MEM_OFF(numaddr));
892 		while (PIF_RCSR(RMAC_ADDR_CMD_MEM) & RMAC_ADDR_CMD_MEM_STR)
893 			;
894 		numaddr++;
895 		ETHER_NEXT_MULTI(step, enm);
896 	}
897 	/* set the remaining entries to the broadcast address */
898 	for (i = numaddr; i < MAX_MCAST_ADDR; i++) {
899 		PIF_WCSR(RMAC_ADDR_DATA0_MEM, 0xffffffffffff0000ULL);
900 		PIF_WCSR(RMAC_ADDR_DATA1_MEM, 0xFFFFFFFFFFFFFFFFULL);
901 		PIF_WCSR(RMAC_ADDR_CMD_MEM, RMAC_ADDR_CMD_MEM_WE|
902 		    RMAC_ADDR_CMD_MEM_STR|RMAC_ADDR_CMD_MEM_OFF(i));
903 		while (PIF_RCSR(RMAC_ADDR_CMD_MEM) & RMAC_ADDR_CMD_MEM_STR)
904 			;
905 	}
906 	ifp->if_flags &= ~IFF_ALLMULTI;
907 	return;
908 
909 allmulti:
910 	/* Just receive everything with the multicast bit set */
911 	ifp->if_flags |= IFF_ALLMULTI;
912 	PIF_WCSR(RMAC_ADDR_DATA0_MEM, 0x8000000000000000ULL);
913 	PIF_WCSR(RMAC_ADDR_DATA1_MEM, 0xF000000000000000ULL);
914 	PIF_WCSR(RMAC_ADDR_CMD_MEM, RMAC_ADDR_CMD_MEM_WE|
915 	    RMAC_ADDR_CMD_MEM_STR|RMAC_ADDR_CMD_MEM_OFF(1));
916 	while (PIF_RCSR(RMAC_ADDR_CMD_MEM) & RMAC_ADDR_CMD_MEM_STR)
917 		;
918 }
919 
920 void
921 xge_start(struct ifnet *ifp)
922 {
923 	struct xge_softc *sc = ifp->if_softc;
924 	struct txd *txd = NULL; /* XXX - gcc */
925 	bus_dmamap_t dmp;
926 	struct	mbuf *m;
927 	uint64_t par, lcr;
928 	int nexttx = 0, ntxd, error, i;
929 
930 	if ((ifp->if_flags & (IFF_RUNNING|IFF_OACTIVE)) != IFF_RUNNING)
931 		return;
932 
933 	par = lcr = 0;
934 	for (;;) {
935 		IFQ_POLL(&ifp->if_snd, m);
936 		if (m == NULL)
937 			break;	/* out of packets */
938 
939 		if (sc->sc_nexttx == sc->sc_lasttx)
940 			break;	/* No more space */
941 
942 		nexttx = sc->sc_nexttx;
943 		dmp = sc->sc_txm[nexttx];
944 
945 		if ((error = bus_dmamap_load_mbuf(sc->sc_dmat, dmp, m,
946 		    BUS_DMA_WRITE|BUS_DMA_NOWAIT)) != 0) {
947 			printf("%s: bus_dmamap_load_mbuf error %d\n",
948 			    XNAME, error);
949 			break;
950 		}
951 		IFQ_DEQUEUE(&ifp->if_snd, m);
952 
953 		bus_dmamap_sync(sc->sc_dmat, dmp, 0, dmp->dm_mapsize,
954 		    BUS_DMASYNC_PREWRITE);
955 
956 		txd = sc->sc_txd[nexttx];
957 		sc->sc_txb[nexttx] = m;
958 		for (i = 0; i < dmp->dm_nsegs; i++) {
959 			if (dmp->dm_segs[i].ds_len == 0)
960 				continue;
961 			txd->txd_control1 = dmp->dm_segs[i].ds_len;
962 			txd->txd_control2 = 0;
963 			txd->txd_bufaddr = dmp->dm_segs[i].ds_addr;
964 			txd++;
965 		}
966 		ntxd = txd - sc->sc_txd[nexttx] - 1;
967 		txd = sc->sc_txd[nexttx];
968 		txd->txd_control1 |= TXD_CTL1_OWN|TXD_CTL1_GCF;
969 		txd->txd_control2 = TXD_CTL2_UTIL;
970 		if (m->m_pkthdr.csum_flags & M_CSUM_TSOv4) {
971 			txd->txd_control1 |= TXD_CTL1_MSS(m->m_pkthdr.segsz);
972 			txd->txd_control1 |= TXD_CTL1_LSO;
973 		}
974 
975 		if (m->m_pkthdr.csum_flags & M_CSUM_IPv4)
976 			txd->txd_control2 |= TXD_CTL2_CIPv4;
977 		if (m->m_pkthdr.csum_flags & M_CSUM_TCPv4)
978 			txd->txd_control2 |= TXD_CTL2_CTCP;
979 		if (m->m_pkthdr.csum_flags & M_CSUM_UDPv4)
980 			txd->txd_control2 |= TXD_CTL2_CUDP;
981 		txd[ntxd].txd_control1 |= TXD_CTL1_GCL;
982 
983 		bus_dmamap_sync(sc->sc_dmat, dmp, 0, dmp->dm_mapsize,
984 		    BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE);
985 
986 		par = sc->sc_txdp[nexttx];
987 		lcr = TXDL_NUMTXD(ntxd) | TXDL_LGC_FIRST | TXDL_LGC_LAST;
988 		if (m->m_pkthdr.csum_flags & M_CSUM_TSOv4)
989 			lcr |= TXDL_SFF;
990 		TXP_WCSR(TXDL_PAR, par);
991 		TXP_WCSR(TXDL_LCR, lcr);
992 
993 		bpf_mtap(ifp, m);
994 
995 		sc->sc_nexttx = NEXTTX(nexttx);
996 	}
997 }
998 
999 /*
1000  * Allocate DMA memory for transmit descriptor fragments.
1001  * Only one map is used for all descriptors.
1002  */
1003 int
1004 xge_alloc_txmem(struct xge_softc *sc)
1005 {
1006 	struct txd *txp;
1007 	bus_dma_segment_t seg;
1008 	bus_addr_t txdp;
1009 	void *kva;
1010 	int i, rseg, state;
1011 
1012 #define TXMAPSZ (NTXDESCS*NTXFRAGS*sizeof(struct txd))
1013 	state = 0;
1014 	if (bus_dmamem_alloc(sc->sc_dmat, TXMAPSZ, PAGE_SIZE, 0,
1015 	    &seg, 1, &rseg, BUS_DMA_NOWAIT))
1016 		goto err;
1017 	state++;
1018 	if (bus_dmamem_map(sc->sc_dmat, &seg, rseg, TXMAPSZ, &kva,
1019 	    BUS_DMA_NOWAIT))
1020 		goto err;
1021 
1022 	state++;
1023 	if (bus_dmamap_create(sc->sc_dmat, TXMAPSZ, 1, TXMAPSZ, 0,
1024 	    BUS_DMA_NOWAIT, &sc->sc_txmap))
1025 		goto err;
1026 	state++;
1027 	if (bus_dmamap_load(sc->sc_dmat, sc->sc_txmap,
1028 	    kva, TXMAPSZ, NULL, BUS_DMA_NOWAIT))
1029 		goto err;
1030 
1031 	/* setup transmit array pointers */
1032 	txp = (struct txd *)kva;
1033 	txdp = seg.ds_addr;
1034 	for (txp = (struct txd *)kva, i = 0; i < NTXDESCS; i++) {
1035 		sc->sc_txd[i] = txp;
1036 		sc->sc_txdp[i] = txdp;
1037 		txp += NTXFRAGS;
1038 		txdp += (NTXFRAGS * sizeof(struct txd));
1039 	}
1040 
1041 	return 0;
1042 
1043 err:
1044 	if (state > 2)
1045 		bus_dmamap_destroy(sc->sc_dmat, sc->sc_txmap);
1046 	if (state > 1)
1047 		bus_dmamem_unmap(sc->sc_dmat, kva, TXMAPSZ);
1048 	if (state > 0)
1049 		bus_dmamem_free(sc->sc_dmat, &seg, rseg);
1050 	return ENOBUFS;
1051 }
1052 
1053 /*
1054  * Allocate DMA memory for receive descriptor,
1055  * only one map is used for all descriptors.
1056  * link receive descriptor pages together.
1057  */
1058 int
1059 xge_alloc_rxmem(struct xge_softc *sc)
1060 {
1061 	struct rxd_4k *rxpp;
1062 	bus_dma_segment_t seg;
1063 	void *kva;
1064 	int i, rseg, state;
1065 
1066 	/* sanity check */
1067 	if (sizeof(struct rxd_4k) != XGE_PAGE) {
1068 		printf("bad compiler struct alignment, %d != %d\n",
1069 		    (int)sizeof(struct rxd_4k), XGE_PAGE);
1070 		return EINVAL;
1071 	}
1072 
1073 	state = 0;
1074 	if (bus_dmamem_alloc(sc->sc_dmat, RXMAPSZ, PAGE_SIZE, 0,
1075 	    &seg, 1, &rseg, BUS_DMA_NOWAIT))
1076 		goto err;
1077 	state++;
1078 	if (bus_dmamem_map(sc->sc_dmat, &seg, rseg, RXMAPSZ, &kva,
1079 	    BUS_DMA_NOWAIT))
1080 		goto err;
1081 
1082 	state++;
1083 	if (bus_dmamap_create(sc->sc_dmat, RXMAPSZ, 1, RXMAPSZ, 0,
1084 	    BUS_DMA_NOWAIT, &sc->sc_rxmap))
1085 		goto err;
1086 	state++;
1087 	if (bus_dmamap_load(sc->sc_dmat, sc->sc_rxmap,
1088 	    kva, RXMAPSZ, NULL, BUS_DMA_NOWAIT))
1089 		goto err;
1090 
1091 	/* setup receive page link pointers */
1092 	for (rxpp = (struct rxd_4k *)kva, i = 0; i < NRXPAGES; i++, rxpp++) {
1093 		sc->sc_rxd_4k[i] = rxpp;
1094 		rxpp->r4_next = (uint64_t)sc->sc_rxmap->dm_segs[0].ds_addr +
1095 		    (i*sizeof(struct rxd_4k)) + sizeof(struct rxd_4k);
1096 	}
1097 	sc->sc_rxd_4k[NRXPAGES-1]->r4_next =
1098 	    (uint64_t)sc->sc_rxmap->dm_segs[0].ds_addr;
1099 
1100 	return 0;
1101 
1102 err:
1103 	if (state > 2)
1104 		bus_dmamap_destroy(sc->sc_dmat, sc->sc_txmap);
1105 	if (state > 1)
1106 		bus_dmamem_unmap(sc->sc_dmat, kva, TXMAPSZ);
1107 	if (state > 0)
1108 		bus_dmamem_free(sc->sc_dmat, &seg, rseg);
1109 	return ENOBUFS;
1110 }
1111 
1112 
1113 /*
1114  * Add a new mbuf chain to descriptor id.
1115  */
1116 int
1117 xge_add_rxbuf(struct xge_softc *sc, int id)
1118 {
1119 	struct rxdesc *rxd;
1120 	struct mbuf *m[5];
1121 	int page, desc, error;
1122 #if RX_MODE == RX_MODE_5
1123 	int i;
1124 #endif
1125 
1126 	page = id/NDESC_BUFMODE;
1127 	desc = id%NDESC_BUFMODE;
1128 
1129 	rxd = &sc->sc_rxd_4k[page]->r4_rxd[desc];
1130 
1131 	/*
1132 	 * Allocate mbufs.
1133 	 * Currently five mbufs and two clusters are used,
1134 	 * the hardware will put (ethernet, ip, tcp/udp) headers in
1135 	 * their own buffer and the clusters are only used for data.
1136 	 */
1137 #if RX_MODE == RX_MODE_1
1138 	MGETHDR(m[0], M_DONTWAIT, MT_DATA);
1139 	if (m[0] == NULL)
1140 		return ENOBUFS;
1141 	MCLGET(m[0], M_DONTWAIT);
1142 	if ((m[0]->m_flags & M_EXT) == 0) {
1143 		m_freem(m[0]);
1144 		return ENOBUFS;
1145 	}
1146 	m[0]->m_len = m[0]->m_pkthdr.len = m[0]->m_ext.ext_size;
1147 #elif RX_MODE == RX_MODE_3
1148 #error missing rxmode 3.
1149 #elif RX_MODE == RX_MODE_5
1150 	MGETHDR(m[0], M_DONTWAIT, MT_DATA);
1151 	for (i = 1; i < 5; i++) {
1152 		MGET(m[i], M_DONTWAIT, MT_DATA);
1153 	}
1154 	if (m[3])
1155 		MCLGET(m[3], M_DONTWAIT);
1156 	if (m[4])
1157 		MCLGET(m[4], M_DONTWAIT);
1158 	if (!m[0] || !m[1] || !m[2] || !m[3] || !m[4] ||
1159 	    ((m[3]->m_flags & M_EXT) == 0) || ((m[4]->m_flags & M_EXT) == 0)) {
1160 		/* Out of something */
1161 		for (i = 0; i < 5; i++)
1162 			if (m[i] != NULL)
1163 				m_free(m[i]);
1164 		return ENOBUFS;
1165 	}
1166 	/* Link'em together */
1167 	m[0]->m_next = m[1];
1168 	m[1]->m_next = m[2];
1169 	m[2]->m_next = m[3];
1170 	m[3]->m_next = m[4];
1171 #else
1172 #error bad mode RX_MODE
1173 #endif
1174 
1175 	if (sc->sc_rxb[id])
1176 		bus_dmamap_unload(sc->sc_dmat, sc->sc_rxm[id]);
1177 	sc->sc_rxb[id] = m[0];
1178 
1179 	error = bus_dmamap_load_mbuf(sc->sc_dmat, sc->sc_rxm[id], m[0],
1180 	    BUS_DMA_READ|BUS_DMA_NOWAIT);
1181 	if (error)
1182 		return error;
1183 	bus_dmamap_sync(sc->sc_dmat, sc->sc_rxm[id], 0,
1184 	    sc->sc_rxm[id]->dm_mapsize, BUS_DMASYNC_PREREAD);
1185 
1186 #if RX_MODE == RX_MODE_1
1187 	rxd->rxd_control2 = RXD_MKCTL2(m[0]->m_len, 0, 0);
1188 	rxd->rxd_buf0 = (uint64_t)sc->sc_rxm[id]->dm_segs[0].ds_addr;
1189 	rxd->rxd_control1 = RXD_CTL1_OWN;
1190 #elif RX_MODE == RX_MODE_3
1191 #elif RX_MODE == RX_MODE_5
1192 	rxd->rxd_control3 = RXD_MKCTL3(0, m[3]->m_len, m[4]->m_len);
1193 	rxd->rxd_control2 = RXD_MKCTL2(m[0]->m_len, m[1]->m_len, m[2]->m_len);
1194 	rxd->rxd_buf0 = (uint64_t)sc->sc_rxm[id]->dm_segs[0].ds_addr;
1195 	rxd->rxd_buf1 = (uint64_t)sc->sc_rxm[id]->dm_segs[1].ds_addr;
1196 	rxd->rxd_buf2 = (uint64_t)sc->sc_rxm[id]->dm_segs[2].ds_addr;
1197 	rxd->rxd_buf3 = (uint64_t)sc->sc_rxm[id]->dm_segs[3].ds_addr;
1198 	rxd->rxd_buf4 = (uint64_t)sc->sc_rxm[id]->dm_segs[4].ds_addr;
1199 	rxd->rxd_control1 = RXD_CTL1_OWN;
1200 #endif
1201 
1202 	XGE_RXSYNC(id, BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE);
1203 	return 0;
1204 }
1205 
1206 /*
1207  * These magics comes from the FreeBSD driver.
1208  */
1209 int
1210 xge_setup_xgxs(struct xge_softc *sc)
1211 {
1212 	/* The magic numbers are described in the users guide */
1213 
1214 	/* Writing to MDIO 0x8000 (Global Config 0) */
1215 	PIF_WCSR(DTX_CONTROL, 0x8000051500000000ULL); DELAY(50);
1216 	PIF_WCSR(DTX_CONTROL, 0x80000515000000E0ULL); DELAY(50);
1217 	PIF_WCSR(DTX_CONTROL, 0x80000515D93500E4ULL); DELAY(50);
1218 
1219 	/* Writing to MDIO 0x8000 (Global Config 1) */
1220 	PIF_WCSR(DTX_CONTROL, 0x8001051500000000ULL); DELAY(50);
1221 	PIF_WCSR(DTX_CONTROL, 0x80010515000000e0ULL); DELAY(50);
1222 	PIF_WCSR(DTX_CONTROL, 0x80010515001e00e4ULL); DELAY(50);
1223 
1224 	/* Reset the Gigablaze */
1225 	PIF_WCSR(DTX_CONTROL, 0x8002051500000000ULL); DELAY(50);
1226 	PIF_WCSR(DTX_CONTROL, 0x80020515000000E0ULL); DELAY(50);
1227 	PIF_WCSR(DTX_CONTROL, 0x80020515F21000E4ULL); DELAY(50);
1228 
1229 	/* read the pole settings */
1230 	PIF_WCSR(DTX_CONTROL, 0x8000051500000000ULL); DELAY(50);
1231 	PIF_WCSR(DTX_CONTROL, 0x80000515000000e0ULL); DELAY(50);
1232 	PIF_WCSR(DTX_CONTROL, 0x80000515000000ecULL); DELAY(50);
1233 
1234 	PIF_WCSR(DTX_CONTROL, 0x8001051500000000ULL); DELAY(50);
1235 	PIF_WCSR(DTX_CONTROL, 0x80010515000000e0ULL); DELAY(50);
1236 	PIF_WCSR(DTX_CONTROL, 0x80010515000000ecULL); DELAY(50);
1237 
1238 	PIF_WCSR(DTX_CONTROL, 0x8002051500000000ULL); DELAY(50);
1239 	PIF_WCSR(DTX_CONTROL, 0x80020515000000e0ULL); DELAY(50);
1240 	PIF_WCSR(DTX_CONTROL, 0x80020515000000ecULL); DELAY(50);
1241 
1242 	/* Workaround for TX Lane XAUI initialization error.
1243 	   Read Xpak PHY register 24 for XAUI lane status */
1244 	PIF_WCSR(DTX_CONTROL, 0x0018040000000000ULL); DELAY(50);
1245 	PIF_WCSR(DTX_CONTROL, 0x00180400000000e0ULL); DELAY(50);
1246 	PIF_WCSR(DTX_CONTROL, 0x00180400000000ecULL); DELAY(50);
1247 
1248 	/*
1249 	 * Reading the MDIO control with value 0x1804001c0F001c
1250 	 * means the TxLanes were already in sync
1251 	 * Reading the MDIO control with value 0x1804000c0x001c
1252 	 * means some TxLanes are not in sync where x is a 4-bit
1253 	 * value representing each lanes
1254 	 */
1255 #if 0
1256 	val = PIF_RCSR(MDIO_CONTROL);
1257 	if (val != 0x1804001c0F001cULL) {
1258 		printf("%s: MDIO_CONTROL: %llx != %llx\n",
1259 		    XNAME, val, 0x1804001c0F001cULL);
1260 		return 1;
1261 	}
1262 #endif
1263 
1264 	/* Set and remove the DTE XS INTLoopBackN */
1265 	PIF_WCSR(DTX_CONTROL, 0x0000051500000000ULL); DELAY(50);
1266 	PIF_WCSR(DTX_CONTROL, 0x00000515604000e0ULL); DELAY(50);
1267 	PIF_WCSR(DTX_CONTROL, 0x00000515604000e4ULL); DELAY(50);
1268 	PIF_WCSR(DTX_CONTROL, 0x00000515204000e4ULL); DELAY(50);
1269 	PIF_WCSR(DTX_CONTROL, 0x00000515204000ecULL); DELAY(50);
1270 
1271 #if 0
1272 	/* Reading the DTX control register Should be 0x5152040001c */
1273 	val = PIF_RCSR(DTX_CONTROL);
1274 	if (val != 0x5152040001cULL) {
1275 		printf("%s: DTX_CONTROL: %llx != %llx\n",
1276 		    XNAME, val, 0x5152040001cULL);
1277 		return 1;
1278 	}
1279 #endif
1280 
1281 	PIF_WCSR(MDIO_CONTROL, 0x0018040000000000ULL); DELAY(50);
1282 	PIF_WCSR(MDIO_CONTROL, 0x00180400000000e0ULL); DELAY(50);
1283 	PIF_WCSR(MDIO_CONTROL, 0x00180400000000ecULL); DELAY(50);
1284 
1285 #if 0
1286 	/* Reading the MIOD control should be 0x1804001c0f001c */
1287 	val = PIF_RCSR(MDIO_CONTROL);
1288 	if (val != 0x1804001c0f001cULL) {
1289 		printf("%s: MDIO_CONTROL2: %llx != %llx\n",
1290 		    XNAME, val, 0x1804001c0f001cULL);
1291 		return 1;
1292 	}
1293 #endif
1294 	return 0;
1295 }
1296