xref: /netbsd-src/sys/dev/pci/if_xge.c (revision b1c86f5f087524e68db12794ee9c3e3da1ab17a0)
1 /*      $NetBSD: if_xge.c,v 1.15 2010/04/05 07:20:28 joerg Exp $ */
2 
3 /*
4  * Copyright (c) 2004, SUNET, Swedish University Computer Network.
5  * All rights reserved.
6  *
7  * Written by Anders Magnusson for SUNET, Swedish University Computer Network.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. All advertising materials mentioning features or use of this software
18  *    must display the following acknowledgement:
19  *      This product includes software developed for the NetBSD Project by
20  *      SUNET, Swedish University Computer Network.
21  * 4. The name of SUNET may not be used to endorse or promote products
22  *    derived from this software without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY SUNET ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
26  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
27  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL SUNET
28  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34  * POSSIBILITY OF SUCH DAMAGE.
35  */
36 
37 /*
38  * Device driver for the S2io Xframe Ten Gigabit Ethernet controller.
39  *
40  * TODO (in no specific order):
41  *	HW VLAN support.
42  *	IPv6 HW cksum.
43  */
44 
45 #include <sys/cdefs.h>
46 __KERNEL_RCSID(0, "$NetBSD: if_xge.c,v 1.15 2010/04/05 07:20:28 joerg Exp $");
47 
48 #include "rnd.h"
49 
50 #include <sys/param.h>
51 #include <sys/systm.h>
52 #include <sys/mbuf.h>
53 #include <sys/malloc.h>
54 #include <sys/kernel.h>
55 #include <sys/socket.h>
56 #include <sys/device.h>
57 
58 #if NRND > 0
59 #include <sys/rnd.h>
60 #endif
61 
62 #include <net/if.h>
63 #include <net/if_dl.h>
64 #include <net/if_media.h>
65 #include <net/if_ether.h>
66 
67 #include <net/bpf.h>
68 
69 #include <sys/bus.h>
70 #include <sys/intr.h>
71 #include <machine/endian.h>
72 
73 #include <dev/mii/mii.h>
74 #include <dev/mii/miivar.h>
75 
76 #include <dev/pci/pcivar.h>
77 #include <dev/pci/pcireg.h>
78 #include <dev/pci/pcidevs.h>
79 
80 #include <sys/proc.h>
81 
82 #include <dev/pci/if_xgereg.h>
83 
84 /*
85  * Some tunable constants, tune with care!
86  */
87 #define RX_MODE		RX_MODE_1  /* Receive mode (buffer usage, see below) */
88 #define NRXDESCS	1016	   /* # of receive descriptors (requested) */
89 #define NTXDESCS	8192	   /* Number of transmit descriptors */
90 #define NTXFRAGS	100	   /* Max fragments per packet */
91 #define XGE_EVENT_COUNTERS	   /* Instrumentation */
92 
93 /*
94  * Receive buffer modes; 1, 3 or 5 buffers.
95  */
96 #define RX_MODE_1 1
97 #define RX_MODE_3 3
98 #define RX_MODE_5 5
99 
100 /*
101  * Use clever macros to avoid a bunch of #ifdef's.
102  */
103 #define XCONCAT3(x,y,z) x ## y ## z
104 #define CONCAT3(x,y,z) XCONCAT3(x,y,z)
105 #define NDESC_BUFMODE CONCAT3(NDESC_,RX_MODE,BUFMODE)
106 #define rxd_4k CONCAT3(rxd,RX_MODE,_4k)
107 #define rxdesc ___CONCAT(rxd,RX_MODE)
108 
109 #define NEXTTX(x)	(((x)+1) % NTXDESCS)
110 #define NRXFRAGS	RX_MODE /* hardware imposed frags */
111 #define NRXPAGES	((NRXDESCS/NDESC_BUFMODE)+1)
112 #define NRXREAL		(NRXPAGES*NDESC_BUFMODE)
113 #define RXMAPSZ		(NRXPAGES*PAGE_SIZE)
114 
115 #ifdef XGE_EVENT_COUNTERS
116 #define XGE_EVCNT_INCR(ev)	(ev)->ev_count++
117 #else
118 #define XGE_EVCNT_INCR(ev)	/* nothing */
119 #endif
120 
121 /*
122  * Magics to fix a bug when the mac address can't be read correctly.
123  * Comes from the Linux driver.
124  */
125 static uint64_t fix_mac[] = {
126 	0x0060000000000000ULL, 0x0060600000000000ULL,
127 	0x0040600000000000ULL, 0x0000600000000000ULL,
128 	0x0020600000000000ULL, 0x0060600000000000ULL,
129 	0x0020600000000000ULL, 0x0060600000000000ULL,
130 	0x0020600000000000ULL, 0x0060600000000000ULL,
131 	0x0020600000000000ULL, 0x0060600000000000ULL,
132 	0x0020600000000000ULL, 0x0060600000000000ULL,
133 	0x0020600000000000ULL, 0x0060600000000000ULL,
134 	0x0020600000000000ULL, 0x0060600000000000ULL,
135 	0x0020600000000000ULL, 0x0060600000000000ULL,
136 	0x0020600000000000ULL, 0x0060600000000000ULL,
137 	0x0020600000000000ULL, 0x0060600000000000ULL,
138 	0x0020600000000000ULL, 0x0000600000000000ULL,
139 	0x0040600000000000ULL, 0x0060600000000000ULL,
140 };
141 
142 
143 struct xge_softc {
144 	struct device sc_dev;
145 	struct ethercom sc_ethercom;
146 #define sc_if sc_ethercom.ec_if
147 	bus_dma_tag_t sc_dmat;
148 	bus_space_tag_t sc_st;
149 	bus_space_handle_t sc_sh;
150 	bus_space_tag_t sc_txt;
151 	bus_space_handle_t sc_txh;
152 	void *sc_ih;
153 
154 	struct ifmedia xena_media;
155 	pcireg_t sc_pciregs[16];
156 
157 	/* Transmit structures */
158 	struct txd *sc_txd[NTXDESCS];	/* transmit frags array */
159 	bus_addr_t sc_txdp[NTXDESCS];	/* bus address of transmit frags */
160 	bus_dmamap_t sc_txm[NTXDESCS];	/* transmit frags map */
161 	struct mbuf *sc_txb[NTXDESCS];	/* transmit mbuf pointer */
162 	int sc_nexttx, sc_lasttx;
163 	bus_dmamap_t sc_txmap;		/* transmit descriptor map */
164 
165 	/* Receive data */
166 	bus_dmamap_t sc_rxmap;		/* receive descriptor map */
167 	struct rxd_4k *sc_rxd_4k[NRXPAGES]; /* receive desc pages */
168 	bus_dmamap_t sc_rxm[NRXREAL];	/* receive buffer map */
169 	struct mbuf *sc_rxb[NRXREAL];	/* mbufs on receive descriptors */
170 	int sc_nextrx;			/* next descriptor to check */
171 
172 #ifdef XGE_EVENT_COUNTERS
173 	struct evcnt sc_intr;	/* # of interrupts */
174 	struct evcnt sc_txintr;	/* # of transmit interrupts */
175 	struct evcnt sc_rxintr;	/* # of receive interrupts */
176 	struct evcnt sc_txqe;	/* # of xmit intrs when board queue empty */
177 #endif
178 };
179 
180 static int xge_match(device_t parent, cfdata_t cf, void *aux);
181 static void xge_attach(device_t parent, device_t self, void *aux);
182 static int xge_alloc_txmem(struct xge_softc *);
183 static int xge_alloc_rxmem(struct xge_softc *);
184 static void xge_start(struct ifnet *);
185 static void xge_stop(struct ifnet *, int);
186 static int xge_add_rxbuf(struct xge_softc *, int);
187 static void xge_mcast_filter(struct xge_softc *sc);
188 static int xge_setup_xgxs(struct xge_softc *sc);
189 static int xge_ioctl(struct ifnet *ifp, u_long cmd, void *data);
190 static int xge_init(struct ifnet *ifp);
191 static void xge_ifmedia_status(struct ifnet *, struct ifmediareq *);
192 static int xge_xgmii_mediachange(struct ifnet *);
193 static int xge_intr(void  *);
194 
195 /*
196  * Helpers to address registers.
197  */
198 #define PIF_WCSR(csr, val)	pif_wcsr(sc, csr, val)
199 #define PIF_RCSR(csr)		pif_rcsr(sc, csr)
200 #define TXP_WCSR(csr, val)	txp_wcsr(sc, csr, val)
201 #define PIF_WKEY(csr, val)	pif_wkey(sc, csr, val)
202 
203 static inline void
204 pif_wcsr(struct xge_softc *sc, bus_size_t csr, uint64_t val)
205 {
206 	uint32_t lval, hval;
207 
208 	lval = val&0xffffffff;
209 	hval = val>>32;
210 	bus_space_write_4(sc->sc_st, sc->sc_sh, csr, lval);
211 	bus_space_write_4(sc->sc_st, sc->sc_sh, csr+4, hval);
212 }
213 
214 static inline uint64_t
215 pif_rcsr(struct xge_softc *sc, bus_size_t csr)
216 {
217 	uint64_t val, val2;
218 	val = bus_space_read_4(sc->sc_st, sc->sc_sh, csr);
219 	val2 = bus_space_read_4(sc->sc_st, sc->sc_sh, csr+4);
220 	val |= (val2 << 32);
221 	return val;
222 }
223 
224 static inline void
225 txp_wcsr(struct xge_softc *sc, bus_size_t csr, uint64_t val)
226 {
227 	uint32_t lval, hval;
228 
229 	lval = val&0xffffffff;
230 	hval = val>>32;
231 	bus_space_write_4(sc->sc_txt, sc->sc_txh, csr, lval);
232 	bus_space_write_4(sc->sc_txt, sc->sc_txh, csr+4, hval);
233 }
234 
235 
236 static inline void
237 pif_wkey(struct xge_softc *sc, bus_size_t csr, uint64_t val)
238 {
239 	uint32_t lval, hval;
240 
241 	lval = val&0xffffffff;
242 	hval = val>>32;
243 	PIF_WCSR(RMAC_CFG_KEY, RMAC_KEY_VALUE);
244 	bus_space_write_4(sc->sc_st, sc->sc_sh, csr, lval);
245 	PIF_WCSR(RMAC_CFG_KEY, RMAC_KEY_VALUE);
246 	bus_space_write_4(sc->sc_st, sc->sc_sh, csr+4, hval);
247 }
248 
249 
250 CFATTACH_DECL(xge, sizeof(struct xge_softc),
251     xge_match, xge_attach, NULL, NULL);
252 
253 #define XNAME device_xname(&sc->sc_dev)
254 
255 #define XGE_RXSYNC(desc, what) \
256 	bus_dmamap_sync(sc->sc_dmat, sc->sc_rxmap, \
257 	(desc/NDESC_BUFMODE) * XGE_PAGE + sizeof(struct rxdesc) * \
258 	(desc%NDESC_BUFMODE), sizeof(struct rxdesc), what)
259 #define XGE_RXD(desc)	&sc->sc_rxd_4k[desc/NDESC_BUFMODE]-> \
260 	r4_rxd[desc%NDESC_BUFMODE]
261 
262 /*
263  * Non-tunable constants.
264  */
265 #define XGE_MAX_MTU		9600
266 #define	XGE_IP_MAXPACKET	65535	/* same as IP_MAXPACKET */
267 
268 static int
269 xge_match(device_t parent, cfdata_t cf, void *aux)
270 {
271 	struct pci_attach_args *pa = aux;
272 
273 	if (PCI_VENDOR(pa->pa_id) == PCI_VENDOR_S2IO &&
274 	    PCI_PRODUCT(pa->pa_id) == PCI_PRODUCT_S2IO_XFRAME)
275 		return (1);
276 
277 	return (0);
278 }
279 
280 void
281 xge_attach(device_t parent, device_t self, void *aux)
282 {
283 	struct pci_attach_args *pa = aux;
284 	struct xge_softc *sc;
285 	struct ifnet *ifp;
286 	pcireg_t memtype;
287 	pci_intr_handle_t ih;
288 	const char *intrstr = NULL;
289 	pci_chipset_tag_t pc = pa->pa_pc;
290 	uint8_t enaddr[ETHER_ADDR_LEN];
291 	uint64_t val;
292 	int i;
293 
294 	sc = device_private(self);
295 
296 	sc->sc_dmat = pa->pa_dmat;
297 
298 	/* Get BAR0 address */
299 	memtype = pci_mapreg_type(pa->pa_pc, pa->pa_tag, XGE_PIF_BAR);
300 	if (pci_mapreg_map(pa, XGE_PIF_BAR, memtype, 0,
301 	    &sc->sc_st, &sc->sc_sh, 0, 0)) {
302 		aprint_error("%s: unable to map PIF BAR registers\n", XNAME);
303 		return;
304 	}
305 
306 	memtype = pci_mapreg_type(pa->pa_pc, pa->pa_tag, XGE_TXP_BAR);
307 	if (pci_mapreg_map(pa, XGE_TXP_BAR, memtype, 0,
308 	    &sc->sc_txt, &sc->sc_txh, 0, 0)) {
309 		aprint_error("%s: unable to map TXP BAR registers\n", XNAME);
310 		return;
311 	}
312 
313 	/* Save PCI config space */
314 	for (i = 0; i < 64; i += 4)
315 		sc->sc_pciregs[i/4] = pci_conf_read(pa->pa_pc, pa->pa_tag, i);
316 
317 #if BYTE_ORDER == LITTLE_ENDIAN
318 	val = (uint64_t)0xFFFFFFFFFFFFFFFFULL;
319 	val &= ~(TxF_R_SE|RxF_W_SE);
320 	PIF_WCSR(SWAPPER_CTRL, val);
321 	PIF_WCSR(SWAPPER_CTRL, val);
322 #elif BYTE_ORDER == BIG_ENDIAN
323 	/* do nothing */
324 #else
325 #error bad endianness!
326 #endif
327 
328 	if ((val = PIF_RCSR(PIF_RD_SWAPPER_Fb)) != SWAPPER_MAGIC)
329 		return printf("%s: failed configuring endian, %llx != %llx!\n",
330 		    XNAME, (unsigned long long)val, SWAPPER_MAGIC);
331 
332 	/*
333 	 * The MAC addr may be all FF's, which is not good.
334 	 * Resolve it by writing some magics to GPIO_CONTROL and
335 	 * force a chip reset to read in the serial eeprom again.
336 	 */
337 	for (i = 0; i < sizeof(fix_mac)/sizeof(fix_mac[0]); i++) {
338 		PIF_WCSR(GPIO_CONTROL, fix_mac[i]);
339 		PIF_RCSR(GPIO_CONTROL);
340 	}
341 
342 	/*
343 	 * Reset the chip and restore the PCI registers.
344 	 */
345 	PIF_WCSR(SW_RESET, 0xa5a5a50000000000ULL);
346 	DELAY(500000);
347 	for (i = 0; i < 64; i += 4)
348 		pci_conf_write(pa->pa_pc, pa->pa_tag, i, sc->sc_pciregs[i/4]);
349 
350 	/*
351 	 * Restore the byte order registers.
352 	 */
353 #if BYTE_ORDER == LITTLE_ENDIAN
354 	val = (uint64_t)0xFFFFFFFFFFFFFFFFULL;
355 	val &= ~(TxF_R_SE|RxF_W_SE);
356 	PIF_WCSR(SWAPPER_CTRL, val);
357 	PIF_WCSR(SWAPPER_CTRL, val);
358 #elif BYTE_ORDER == BIG_ENDIAN
359 	/* do nothing */
360 #else
361 #error bad endianness!
362 #endif
363 
364 	if ((val = PIF_RCSR(PIF_RD_SWAPPER_Fb)) != SWAPPER_MAGIC)
365 		return printf("%s: failed configuring endian2, %llx != %llx!\n",
366 		    XNAME, (unsigned long long)val, SWAPPER_MAGIC);
367 
368 	/*
369 	 * XGXS initialization.
370 	 */
371 	/* 29, reset */
372 	PIF_WCSR(SW_RESET, 0);
373 	DELAY(500000);
374 
375 	/* 30, configure XGXS transceiver */
376 	xge_setup_xgxs(sc);
377 
378 	/* 33, program MAC address (not needed here) */
379 	/* Get ethernet address */
380 	PIF_WCSR(RMAC_ADDR_CMD_MEM,
381 	    RMAC_ADDR_CMD_MEM_STR|RMAC_ADDR_CMD_MEM_OFF(0));
382 	while (PIF_RCSR(RMAC_ADDR_CMD_MEM) & RMAC_ADDR_CMD_MEM_STR)
383 		;
384 	val = PIF_RCSR(RMAC_ADDR_DATA0_MEM);
385 	for (i = 0; i < ETHER_ADDR_LEN; i++)
386 		enaddr[i] = (uint8_t)(val >> (56 - (8*i)));
387 
388 	/*
389 	 * Get memory for transmit descriptor lists.
390 	 */
391 	if (xge_alloc_txmem(sc))
392 		return printf("%s: failed allocating txmem.\n", XNAME);
393 
394 	/* 9 and 10 - set FIFO number/prio */
395 	PIF_WCSR(TX_FIFO_P0, TX_FIFO_LEN0(NTXDESCS));
396 	PIF_WCSR(TX_FIFO_P1, 0ULL);
397 	PIF_WCSR(TX_FIFO_P2, 0ULL);
398 	PIF_WCSR(TX_FIFO_P3, 0ULL);
399 
400 	/* 11, XXX set round-robin prio? */
401 
402 	/* 12, enable transmit FIFO */
403 	val = PIF_RCSR(TX_FIFO_P0);
404 	val |= TX_FIFO_ENABLE;
405 	PIF_WCSR(TX_FIFO_P0, val);
406 
407 	/* 13, disable some error checks */
408 	PIF_WCSR(TX_PA_CFG,
409 	    TX_PA_CFG_IFR|TX_PA_CFG_ISO|TX_PA_CFG_ILC|TX_PA_CFG_ILE);
410 
411 	/*
412 	 * Create transmit DMA maps.
413 	 * Make them large for TSO.
414 	 */
415 	for (i = 0; i < NTXDESCS; i++) {
416 		if (bus_dmamap_create(sc->sc_dmat, XGE_IP_MAXPACKET,
417 		    NTXFRAGS, MCLBYTES, 0, 0, &sc->sc_txm[i]))
418 			return printf("%s: cannot create TX DMA maps\n", XNAME);
419 	}
420 
421 	sc->sc_lasttx = NTXDESCS-1;
422 
423 	/*
424 	 * RxDMA initialization.
425 	 * Only use one out of 8 possible receive queues.
426 	 */
427 	if (xge_alloc_rxmem(sc))	/* allocate rx descriptor memory */
428 		return printf("%s: failed allocating rxmem\n", XNAME);
429 
430 	/* Create receive buffer DMA maps */
431 	for (i = 0; i < NRXREAL; i++) {
432 		if (bus_dmamap_create(sc->sc_dmat, XGE_MAX_MTU,
433 		    NRXFRAGS, MCLBYTES, 0, 0, &sc->sc_rxm[i]))
434 			return printf("%s: cannot create RX DMA maps\n", XNAME);
435 	}
436 
437 	/* allocate mbufs to receive descriptors */
438 	for (i = 0; i < NRXREAL; i++)
439 		if (xge_add_rxbuf(sc, i))
440 			panic("out of mbufs too early");
441 
442 	/* 14, setup receive ring priority */
443 	PIF_WCSR(RX_QUEUE_PRIORITY, 0ULL); /* only use one ring */
444 
445 	/* 15, setup receive ring round-robin calendar */
446 	PIF_WCSR(RX_W_ROUND_ROBIN_0, 0ULL); /* only use one ring */
447 	PIF_WCSR(RX_W_ROUND_ROBIN_1, 0ULL);
448 	PIF_WCSR(RX_W_ROUND_ROBIN_2, 0ULL);
449 	PIF_WCSR(RX_W_ROUND_ROBIN_3, 0ULL);
450 	PIF_WCSR(RX_W_ROUND_ROBIN_4, 0ULL);
451 
452 	/* 16, write receive ring start address */
453 	PIF_WCSR(PRC_RXD0_0, (uint64_t)sc->sc_rxmap->dm_segs[0].ds_addr);
454 	/* PRC_RXD0_[1-7] are not used */
455 
456 	/* 17, Setup alarm registers */
457 	PIF_WCSR(PRC_ALARM_ACTION, 0ULL); /* Default everything to retry */
458 
459 	/* 18, init receive ring controller */
460 #if RX_MODE == RX_MODE_1
461 	val = RING_MODE_1;
462 #elif RX_MODE == RX_MODE_3
463 	val = RING_MODE_3;
464 #else /* RX_MODE == RX_MODE_5 */
465 	val = RING_MODE_5;
466 #endif
467 	PIF_WCSR(PRC_CTRL_0, RC_IN_SVC|val);
468 	/* leave 1-7 disabled */
469 	/* XXXX snoop configuration? */
470 
471 	/* 19, set chip memory assigned to the queue */
472 	PIF_WCSR(RX_QUEUE_CFG, MC_QUEUE(0, 64)); /* all 64M to queue 0 */
473 
474 	/* 20, setup RLDRAM parameters */
475 	/* do not touch it for now */
476 
477 	/* 21, setup pause frame thresholds */
478 	/* so not touch the defaults */
479 	/* XXX - must 0xff be written as stated in the manual? */
480 
481 	/* 22, configure RED */
482 	/* we do not want to drop packets, so ignore */
483 
484 	/* 23, initiate RLDRAM */
485 	val = PIF_RCSR(MC_RLDRAM_MRS);
486 	val |= MC_QUEUE_SIZE_ENABLE|MC_RLDRAM_MRS_ENABLE;
487 	PIF_WCSR(MC_RLDRAM_MRS, val);
488 	DELAY(1000);
489 
490 	/*
491 	 * Setup interrupt policies.
492 	 */
493 	/* 40, Transmit interrupts */
494 	PIF_WCSR(TTI_DATA1_MEM, TX_TIMER_VAL(0x1ff) | TX_TIMER_AC |
495 	    TX_URNG_A(5) | TX_URNG_B(20) | TX_URNG_C(48));
496 	PIF_WCSR(TTI_DATA2_MEM,
497 	    TX_UFC_A(25) | TX_UFC_B(64) | TX_UFC_C(128) | TX_UFC_D(512));
498 	PIF_WCSR(TTI_COMMAND_MEM, TTI_CMD_MEM_WE | TTI_CMD_MEM_STROBE);
499 	while (PIF_RCSR(TTI_COMMAND_MEM) & TTI_CMD_MEM_STROBE)
500 		;
501 
502 	/* 41, Receive interrupts */
503 	PIF_WCSR(RTI_DATA1_MEM, RX_TIMER_VAL(0x800) | RX_TIMER_AC |
504 	    RX_URNG_A(5) | RX_URNG_B(20) | RX_URNG_C(50));
505 	PIF_WCSR(RTI_DATA2_MEM,
506 	    RX_UFC_A(64) | RX_UFC_B(128) | RX_UFC_C(256) | RX_UFC_D(512));
507 	PIF_WCSR(RTI_COMMAND_MEM, RTI_CMD_MEM_WE | RTI_CMD_MEM_STROBE);
508 	while (PIF_RCSR(RTI_COMMAND_MEM) & RTI_CMD_MEM_STROBE)
509 		;
510 
511 	/*
512 	 * Setup media stuff.
513 	 */
514 	ifmedia_init(&sc->xena_media, IFM_IMASK, xge_xgmii_mediachange,
515 	    xge_ifmedia_status);
516 	ifmedia_add(&sc->xena_media, IFM_ETHER|IFM_10G_LR, 0, NULL);
517 	ifmedia_set(&sc->xena_media, IFM_ETHER|IFM_10G_LR);
518 
519 	aprint_normal("%s: Ethernet address %s\n", XNAME,
520 	    ether_sprintf(enaddr));
521 
522 	ifp = &sc->sc_ethercom.ec_if;
523 	strlcpy(ifp->if_xname, device_xname(&sc->sc_dev), IFNAMSIZ);
524 	ifp->if_baudrate = 10000000000LL;
525 	ifp->if_init = xge_init;
526 	ifp->if_stop = xge_stop;
527 	ifp->if_softc = sc;
528 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
529 	ifp->if_ioctl = xge_ioctl;
530 	ifp->if_start = xge_start;
531 	IFQ_SET_MAXLEN(&ifp->if_snd, max(NTXDESCS - 1, IFQ_MAXLEN));
532 	IFQ_SET_READY(&ifp->if_snd);
533 
534 	/*
535 	 * Offloading capabilities.
536 	 */
537 	sc->sc_ethercom.ec_capabilities |=
538 	    ETHERCAP_JUMBO_MTU | ETHERCAP_VLAN_MTU;
539 	ifp->if_capabilities |=
540 	    IFCAP_CSUM_IPv4_Rx | IFCAP_CSUM_IPv4_Tx |
541 	    IFCAP_CSUM_TCPv4_Rx | IFCAP_CSUM_TCPv4_Tx |
542 	    IFCAP_CSUM_UDPv4_Rx | IFCAP_CSUM_UDPv4_Tx | IFCAP_TSOv4;
543 
544 	/*
545 	 * Attach the interface.
546 	 */
547 	if_attach(ifp);
548 	ether_ifattach(ifp, enaddr);
549 
550 	/*
551 	 * Setup interrupt vector before initializing.
552 	 */
553 	if (pci_intr_map(pa, &ih))
554 		return aprint_error_dev(&sc->sc_dev, "unable to map interrupt\n");
555 	intrstr = pci_intr_string(pc, ih);
556 	if ((sc->sc_ih =
557 	    pci_intr_establish(pc, ih, IPL_NET, xge_intr, sc)) == NULL)
558 		return aprint_error_dev(&sc->sc_dev, "unable to establish interrupt at %s\n",
559 		    intrstr ? intrstr : "<unknown>");
560 	aprint_normal_dev(&sc->sc_dev, "interrupting at %s\n", intrstr);
561 
562 #ifdef XGE_EVENT_COUNTERS
563 	evcnt_attach_dynamic(&sc->sc_intr, EVCNT_TYPE_MISC,
564 	    NULL, XNAME, "intr");
565 	evcnt_attach_dynamic(&sc->sc_txintr, EVCNT_TYPE_MISC,
566 	    NULL, XNAME, "txintr");
567 	evcnt_attach_dynamic(&sc->sc_rxintr, EVCNT_TYPE_MISC,
568 	    NULL, XNAME, "rxintr");
569 	evcnt_attach_dynamic(&sc->sc_txqe, EVCNT_TYPE_MISC,
570 	    NULL, XNAME, "txqe");
571 #endif
572 }
573 
574 void
575 xge_ifmedia_status(struct ifnet *ifp, struct ifmediareq *ifmr)
576 {
577 	struct xge_softc *sc = ifp->if_softc;
578 	uint64_t reg;
579 
580 	ifmr->ifm_status = IFM_AVALID;
581 	ifmr->ifm_active = IFM_ETHER|IFM_10G_LR;
582 
583 	reg = PIF_RCSR(ADAPTER_STATUS);
584 	if ((reg & (RMAC_REMOTE_FAULT|RMAC_LOCAL_FAULT)) == 0)
585 		ifmr->ifm_status |= IFM_ACTIVE;
586 }
587 
588 int
589 xge_xgmii_mediachange(struct ifnet *ifp)
590 {
591 	return 0;
592 }
593 
594 static void
595 xge_enable(struct xge_softc *sc)
596 {
597 	uint64_t val;
598 
599 	/* 2, enable adapter */
600 	val = PIF_RCSR(ADAPTER_CONTROL);
601 	val |= ADAPTER_EN;
602 	PIF_WCSR(ADAPTER_CONTROL, val);
603 
604 	/* 3, light the card enable led */
605 	val = PIF_RCSR(ADAPTER_CONTROL);
606 	val |= LED_ON;
607 	PIF_WCSR(ADAPTER_CONTROL, val);
608 	printf("%s: link up\n", XNAME);
609 
610 }
611 
612 int
613 xge_init(struct ifnet *ifp)
614 {
615 	struct xge_softc *sc = ifp->if_softc;
616 	uint64_t val;
617 
618 	if (ifp->if_flags & IFF_RUNNING)
619 		return 0;
620 
621 	/* 31+32, setup MAC config */
622 	PIF_WKEY(MAC_CFG, TMAC_EN|RMAC_EN|TMAC_APPEND_PAD|RMAC_STRIP_FCS|
623 	    RMAC_BCAST_EN|RMAC_DISCARD_PFRM|RMAC_PROM_EN);
624 
625 	DELAY(1000);
626 
627 	/* 54, ensure that the adapter is 'quiescent' */
628 	val = PIF_RCSR(ADAPTER_STATUS);
629 	if ((val & QUIESCENT) != QUIESCENT) {
630 		char buf[200];
631 		printf("%s: adapter not quiescent, aborting\n", XNAME);
632 		val = (val & QUIESCENT) ^ QUIESCENT;
633 		snprintb(buf, sizeof buf, QUIESCENT_BMSK, val);
634 		printf("%s: ADAPTER_STATUS missing bits %s\n", XNAME, buf);
635 		return 1;
636 	}
637 
638 	/* 56, enable the transmit laser */
639 	val = PIF_RCSR(ADAPTER_CONTROL);
640 	val |= EOI_TX_ON;
641 	PIF_WCSR(ADAPTER_CONTROL, val);
642 
643 	xge_enable(sc);
644 	/*
645 	 * Enable all interrupts
646 	 */
647 	PIF_WCSR(TX_TRAFFIC_MASK, 0);
648 	PIF_WCSR(RX_TRAFFIC_MASK, 0);
649 	PIF_WCSR(GENERAL_INT_MASK, 0);
650 	PIF_WCSR(TXPIC_INT_MASK, 0);
651 	PIF_WCSR(RXPIC_INT_MASK, 0);
652 	PIF_WCSR(MAC_INT_MASK, MAC_TMAC_INT); /* only from RMAC */
653 	PIF_WCSR(MAC_RMAC_ERR_MASK, ~RMAC_LINK_STATE_CHANGE_INT);
654 
655 
656 	/* Done... */
657 	ifp->if_flags |= IFF_RUNNING;
658 	ifp->if_flags &= ~IFF_OACTIVE;
659 
660 	return 0;
661 }
662 
663 static void
664 xge_stop(struct ifnet *ifp, int disable)
665 {
666 	struct xge_softc *sc = ifp->if_softc;
667 	uint64_t val;
668 
669 	val = PIF_RCSR(ADAPTER_CONTROL);
670 	val &= ~ADAPTER_EN;
671 	PIF_WCSR(ADAPTER_CONTROL, val);
672 
673 	while ((PIF_RCSR(ADAPTER_STATUS) & QUIESCENT) != QUIESCENT)
674 		;
675 }
676 
677 int
678 xge_intr(void *pv)
679 {
680 	struct xge_softc *sc = pv;
681 	struct txd *txd;
682 	struct ifnet *ifp = &sc->sc_if;
683 	bus_dmamap_t dmp;
684 	uint64_t val;
685 	int i, lasttx, plen;
686 
687 	val = PIF_RCSR(GENERAL_INT_STATUS);
688 	if (val == 0)
689 		return 0; /* no interrupt here */
690 
691 	XGE_EVCNT_INCR(&sc->sc_intr);
692 
693 	PIF_WCSR(GENERAL_INT_STATUS, val);
694 
695 	if ((val = PIF_RCSR(MAC_RMAC_ERR_REG)) & RMAC_LINK_STATE_CHANGE_INT) {
696 		/* Wait for quiescence */
697 		printf("%s: link down\n", XNAME);
698 		while ((PIF_RCSR(ADAPTER_STATUS) & QUIESCENT) != QUIESCENT)
699 			;
700 		PIF_WCSR(MAC_RMAC_ERR_REG, RMAC_LINK_STATE_CHANGE_INT);
701 
702 		val = PIF_RCSR(ADAPTER_STATUS);
703 		if ((val & (RMAC_REMOTE_FAULT|RMAC_LOCAL_FAULT)) == 0)
704 			xge_enable(sc); /* Only if link restored */
705 	}
706 
707 	if ((val = PIF_RCSR(TX_TRAFFIC_INT))) {
708 		XGE_EVCNT_INCR(&sc->sc_txintr);
709 		PIF_WCSR(TX_TRAFFIC_INT, val); /* clear interrupt bits */
710 	}
711 	/*
712 	 * Collect sent packets.
713 	 */
714 	lasttx = sc->sc_lasttx;
715 	while ((i = NEXTTX(sc->sc_lasttx)) != sc->sc_nexttx) {
716 		txd = sc->sc_txd[i];
717 		dmp = sc->sc_txm[i];
718 
719 		bus_dmamap_sync(sc->sc_dmat, dmp, 0,
720 		    dmp->dm_mapsize,
721 		    BUS_DMASYNC_POSTREAD|BUS_DMASYNC_POSTWRITE);
722 
723 		if (txd->txd_control1 & TXD_CTL1_OWN) {
724 			bus_dmamap_sync(sc->sc_dmat, dmp, 0,
725 			    dmp->dm_mapsize, BUS_DMASYNC_PREREAD);
726 			break;
727 		}
728 		bus_dmamap_unload(sc->sc_dmat, dmp);
729 		m_freem(sc->sc_txb[i]);
730 		ifp->if_opackets++;
731 		sc->sc_lasttx = i;
732 	}
733 	if (i == sc->sc_nexttx) {
734 		XGE_EVCNT_INCR(&sc->sc_txqe);
735 	}
736 
737 	if (sc->sc_lasttx != lasttx)
738 		ifp->if_flags &= ~IFF_OACTIVE;
739 
740 	xge_start(ifp); /* Try to get more packets on the wire */
741 
742 	if ((val = PIF_RCSR(RX_TRAFFIC_INT))) {
743 		XGE_EVCNT_INCR(&sc->sc_rxintr);
744 		PIF_WCSR(RX_TRAFFIC_INT, val); /* clear interrupt bits */
745 	}
746 
747 	for (;;) {
748 		struct rxdesc *rxd;
749 		struct mbuf *m;
750 
751 		XGE_RXSYNC(sc->sc_nextrx,
752 		    BUS_DMASYNC_POSTREAD|BUS_DMASYNC_POSTWRITE);
753 
754 		rxd = XGE_RXD(sc->sc_nextrx);
755 		if (rxd->rxd_control1 & RXD_CTL1_OWN) {
756 			XGE_RXSYNC(sc->sc_nextrx, BUS_DMASYNC_PREREAD);
757 			break;
758 		}
759 
760 		/* got a packet */
761 		m = sc->sc_rxb[sc->sc_nextrx];
762 #if RX_MODE == RX_MODE_1
763 		plen = m->m_len = RXD_CTL2_BUF0SIZ(rxd->rxd_control2);
764 #elif RX_MODE == RX_MODE_3
765 #error Fix rxmodes in xge_intr
766 #elif RX_MODE == RX_MODE_5
767 		plen = m->m_len = RXD_CTL2_BUF0SIZ(rxd->rxd_control2);
768 		plen += m->m_next->m_len = RXD_CTL2_BUF1SIZ(rxd->rxd_control2);
769 		plen += m->m_next->m_next->m_len =
770 		    RXD_CTL2_BUF2SIZ(rxd->rxd_control2);
771 		plen += m->m_next->m_next->m_next->m_len =
772 		    RXD_CTL3_BUF3SIZ(rxd->rxd_control3);
773 		plen += m->m_next->m_next->m_next->m_next->m_len =
774 		    RXD_CTL3_BUF4SIZ(rxd->rxd_control3);
775 #endif
776 		m->m_pkthdr.rcvif = ifp;
777 		m->m_pkthdr.len = plen;
778 
779 		val = rxd->rxd_control1;
780 
781 		if (xge_add_rxbuf(sc, sc->sc_nextrx)) {
782 			/* Failed, recycle this mbuf */
783 #if RX_MODE == RX_MODE_1
784 			rxd->rxd_control2 = RXD_MKCTL2(MCLBYTES, 0, 0);
785 			rxd->rxd_control1 = RXD_CTL1_OWN;
786 #elif RX_MODE == RX_MODE_3
787 #elif RX_MODE == RX_MODE_5
788 #endif
789 			XGE_RXSYNC(sc->sc_nextrx,
790 			    BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE);
791 			ifp->if_ierrors++;
792 			break;
793 		}
794 
795 		ifp->if_ipackets++;
796 
797 		if (RXD_CTL1_PROTOS(val) & (RXD_CTL1_P_IPv4|RXD_CTL1_P_IPv6)) {
798 			m->m_pkthdr.csum_flags |= M_CSUM_IPv4;
799 			if (RXD_CTL1_L3CSUM(val) != 0xffff)
800 				m->m_pkthdr.csum_flags |= M_CSUM_IPv4_BAD;
801 		}
802 		if (RXD_CTL1_PROTOS(val) & RXD_CTL1_P_TCP) {
803 			m->m_pkthdr.csum_flags |= M_CSUM_TCPv4|M_CSUM_TCPv6;
804 			if (RXD_CTL1_L4CSUM(val) != 0xffff)
805 				m->m_pkthdr.csum_flags |= M_CSUM_TCP_UDP_BAD;
806 		}
807 		if (RXD_CTL1_PROTOS(val) & RXD_CTL1_P_UDP) {
808 			m->m_pkthdr.csum_flags |= M_CSUM_UDPv4|M_CSUM_UDPv6;
809 			if (RXD_CTL1_L4CSUM(val) != 0xffff)
810 				m->m_pkthdr.csum_flags |= M_CSUM_TCP_UDP_BAD;
811 		}
812 
813 		bpf_mtap(ifp, m);
814 
815 		(*ifp->if_input)(ifp, m);
816 
817 		if (++sc->sc_nextrx == NRXREAL)
818 			sc->sc_nextrx = 0;
819 
820 	}
821 
822 	return 0;
823 }
824 
825 int
826 xge_ioctl(struct ifnet *ifp, u_long cmd, void *data)
827 {
828 	struct xge_softc *sc = ifp->if_softc;
829 	struct ifreq *ifr = (struct ifreq *) data;
830 	int s, error = 0;
831 
832 	s = splnet();
833 
834 	switch (cmd) {
835 	case SIOCSIFMTU:
836 		if (ifr->ifr_mtu < ETHERMIN || ifr->ifr_mtu > XGE_MAX_MTU)
837 			error = EINVAL;
838 		else if ((error = ifioctl_common(ifp, cmd, data)) == ENETRESET){
839 			PIF_WCSR(RMAC_MAX_PYLD_LEN,
840 			    RMAC_PYLD_LEN(ifr->ifr_mtu));
841 			error = 0;
842 		}
843 		break;
844 
845 	case SIOCGIFMEDIA:
846 	case SIOCSIFMEDIA:
847 		error = ifmedia_ioctl(ifp, ifr, &sc->xena_media, cmd);
848 		break;
849 
850 	default:
851 		if ((error = ether_ioctl(ifp, cmd, data)) != ENETRESET)
852 			break;
853 
854 		error = 0;
855 
856 		if (cmd != SIOCADDMULTI && cmd != SIOCDELMULTI)
857 			;
858 		else if (ifp->if_flags & IFF_RUNNING) {
859 			/* Change multicast list */
860 			xge_mcast_filter(sc);
861 		}
862 		break;
863 	}
864 
865 	splx(s);
866 	return(error);
867 }
868 
869 void
870 xge_mcast_filter(struct xge_softc *sc)
871 {
872 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
873 	struct ethercom *ec = &sc->sc_ethercom;
874 	struct ether_multi *enm;
875 	struct ether_multistep step;
876 	int i, numaddr = 1; /* first slot used for card unicast address */
877 	uint64_t val;
878 
879 	ETHER_FIRST_MULTI(step, ec, enm);
880 	while (enm != NULL) {
881 		if (memcmp(enm->enm_addrlo, enm->enm_addrhi, ETHER_ADDR_LEN)) {
882 			/* Skip ranges */
883 			goto allmulti;
884 		}
885 		if (numaddr == MAX_MCAST_ADDR)
886 			goto allmulti;
887 		for (val = 0, i = 0; i < ETHER_ADDR_LEN; i++) {
888 			val <<= 8;
889 			val |= enm->enm_addrlo[i];
890 		}
891 		PIF_WCSR(RMAC_ADDR_DATA0_MEM, val << 16);
892 		PIF_WCSR(RMAC_ADDR_DATA1_MEM, 0xFFFFFFFFFFFFFFFFULL);
893 		PIF_WCSR(RMAC_ADDR_CMD_MEM, RMAC_ADDR_CMD_MEM_WE|
894 		    RMAC_ADDR_CMD_MEM_STR|RMAC_ADDR_CMD_MEM_OFF(numaddr));
895 		while (PIF_RCSR(RMAC_ADDR_CMD_MEM) & RMAC_ADDR_CMD_MEM_STR)
896 			;
897 		numaddr++;
898 		ETHER_NEXT_MULTI(step, enm);
899 	}
900 	/* set the remaining entries to the broadcast address */
901 	for (i = numaddr; i < MAX_MCAST_ADDR; i++) {
902 		PIF_WCSR(RMAC_ADDR_DATA0_MEM, 0xffffffffffff0000ULL);
903 		PIF_WCSR(RMAC_ADDR_DATA1_MEM, 0xFFFFFFFFFFFFFFFFULL);
904 		PIF_WCSR(RMAC_ADDR_CMD_MEM, RMAC_ADDR_CMD_MEM_WE|
905 		    RMAC_ADDR_CMD_MEM_STR|RMAC_ADDR_CMD_MEM_OFF(i));
906 		while (PIF_RCSR(RMAC_ADDR_CMD_MEM) & RMAC_ADDR_CMD_MEM_STR)
907 			;
908 	}
909 	ifp->if_flags &= ~IFF_ALLMULTI;
910 	return;
911 
912 allmulti:
913 	/* Just receive everything with the multicast bit set */
914 	ifp->if_flags |= IFF_ALLMULTI;
915 	PIF_WCSR(RMAC_ADDR_DATA0_MEM, 0x8000000000000000ULL);
916 	PIF_WCSR(RMAC_ADDR_DATA1_MEM, 0xF000000000000000ULL);
917 	PIF_WCSR(RMAC_ADDR_CMD_MEM, RMAC_ADDR_CMD_MEM_WE|
918 	    RMAC_ADDR_CMD_MEM_STR|RMAC_ADDR_CMD_MEM_OFF(1));
919 	while (PIF_RCSR(RMAC_ADDR_CMD_MEM) & RMAC_ADDR_CMD_MEM_STR)
920 		;
921 }
922 
923 void
924 xge_start(struct ifnet *ifp)
925 {
926 	struct xge_softc *sc = ifp->if_softc;
927 	struct txd *txd = NULL; /* XXX - gcc */
928 	bus_dmamap_t dmp;
929 	struct	mbuf *m;
930 	uint64_t par, lcr;
931 	int nexttx = 0, ntxd, error, i;
932 
933 	if ((ifp->if_flags & (IFF_RUNNING|IFF_OACTIVE)) != IFF_RUNNING)
934 		return;
935 
936 	par = lcr = 0;
937 	for (;;) {
938 		IFQ_POLL(&ifp->if_snd, m);
939 		if (m == NULL)
940 			break;	/* out of packets */
941 
942 		if (sc->sc_nexttx == sc->sc_lasttx)
943 			break;	/* No more space */
944 
945 		nexttx = sc->sc_nexttx;
946 		dmp = sc->sc_txm[nexttx];
947 
948 		if ((error = bus_dmamap_load_mbuf(sc->sc_dmat, dmp, m,
949 		    BUS_DMA_WRITE|BUS_DMA_NOWAIT)) != 0) {
950 			printf("%s: bus_dmamap_load_mbuf error %d\n",
951 			    XNAME, error);
952 			break;
953 		}
954 		IFQ_DEQUEUE(&ifp->if_snd, m);
955 
956 		bus_dmamap_sync(sc->sc_dmat, dmp, 0, dmp->dm_mapsize,
957 		    BUS_DMASYNC_PREWRITE);
958 
959 		txd = sc->sc_txd[nexttx];
960 		sc->sc_txb[nexttx] = m;
961 		for (i = 0; i < dmp->dm_nsegs; i++) {
962 			if (dmp->dm_segs[i].ds_len == 0)
963 				continue;
964 			txd->txd_control1 = dmp->dm_segs[i].ds_len;
965 			txd->txd_control2 = 0;
966 			txd->txd_bufaddr = dmp->dm_segs[i].ds_addr;
967 			txd++;
968 		}
969 		ntxd = txd - sc->sc_txd[nexttx] - 1;
970 		txd = sc->sc_txd[nexttx];
971 		txd->txd_control1 |= TXD_CTL1_OWN|TXD_CTL1_GCF;
972 		txd->txd_control2 = TXD_CTL2_UTIL;
973 		if (m->m_pkthdr.csum_flags & M_CSUM_TSOv4) {
974 			txd->txd_control1 |= TXD_CTL1_MSS(m->m_pkthdr.segsz);
975 			txd->txd_control1 |= TXD_CTL1_LSO;
976 		}
977 
978 		if (m->m_pkthdr.csum_flags & M_CSUM_IPv4)
979 			txd->txd_control2 |= TXD_CTL2_CIPv4;
980 		if (m->m_pkthdr.csum_flags & M_CSUM_TCPv4)
981 			txd->txd_control2 |= TXD_CTL2_CTCP;
982 		if (m->m_pkthdr.csum_flags & M_CSUM_UDPv4)
983 			txd->txd_control2 |= TXD_CTL2_CUDP;
984 		txd[ntxd].txd_control1 |= TXD_CTL1_GCL;
985 
986 		bus_dmamap_sync(sc->sc_dmat, dmp, 0, dmp->dm_mapsize,
987 		    BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE);
988 
989 		par = sc->sc_txdp[nexttx];
990 		lcr = TXDL_NUMTXD(ntxd) | TXDL_LGC_FIRST | TXDL_LGC_LAST;
991 		if (m->m_pkthdr.csum_flags & M_CSUM_TSOv4)
992 			lcr |= TXDL_SFF;
993 		TXP_WCSR(TXDL_PAR, par);
994 		TXP_WCSR(TXDL_LCR, lcr);
995 
996 		bpf_mtap(ifp, m);
997 
998 		sc->sc_nexttx = NEXTTX(nexttx);
999 	}
1000 }
1001 
1002 /*
1003  * Allocate DMA memory for transmit descriptor fragments.
1004  * Only one map is used for all descriptors.
1005  */
1006 int
1007 xge_alloc_txmem(struct xge_softc *sc)
1008 {
1009 	struct txd *txp;
1010 	bus_dma_segment_t seg;
1011 	bus_addr_t txdp;
1012 	void *kva;
1013 	int i, rseg, state;
1014 
1015 #define TXMAPSZ (NTXDESCS*NTXFRAGS*sizeof(struct txd))
1016 	state = 0;
1017 	if (bus_dmamem_alloc(sc->sc_dmat, TXMAPSZ, PAGE_SIZE, 0,
1018 	    &seg, 1, &rseg, BUS_DMA_NOWAIT))
1019 		goto err;
1020 	state++;
1021 	if (bus_dmamem_map(sc->sc_dmat, &seg, rseg, TXMAPSZ, &kva,
1022 	    BUS_DMA_NOWAIT))
1023 		goto err;
1024 
1025 	state++;
1026 	if (bus_dmamap_create(sc->sc_dmat, TXMAPSZ, 1, TXMAPSZ, 0,
1027 	    BUS_DMA_NOWAIT, &sc->sc_txmap))
1028 		goto err;
1029 	state++;
1030 	if (bus_dmamap_load(sc->sc_dmat, sc->sc_txmap,
1031 	    kva, TXMAPSZ, NULL, BUS_DMA_NOWAIT))
1032 		goto err;
1033 
1034 	/* setup transmit array pointers */
1035 	txp = (struct txd *)kva;
1036 	txdp = seg.ds_addr;
1037 	for (txp = (struct txd *)kva, i = 0; i < NTXDESCS; i++) {
1038 		sc->sc_txd[i] = txp;
1039 		sc->sc_txdp[i] = txdp;
1040 		txp += NTXFRAGS;
1041 		txdp += (NTXFRAGS * sizeof(struct txd));
1042 	}
1043 
1044 	return 0;
1045 
1046 err:
1047 	if (state > 2)
1048 		bus_dmamap_destroy(sc->sc_dmat, sc->sc_txmap);
1049 	if (state > 1)
1050 		bus_dmamem_unmap(sc->sc_dmat, kva, TXMAPSZ);
1051 	if (state > 0)
1052 		bus_dmamem_free(sc->sc_dmat, &seg, rseg);
1053 	return ENOBUFS;
1054 }
1055 
1056 /*
1057  * Allocate DMA memory for receive descriptor,
1058  * only one map is used for all descriptors.
1059  * link receive descriptor pages together.
1060  */
1061 int
1062 xge_alloc_rxmem(struct xge_softc *sc)
1063 {
1064 	struct rxd_4k *rxpp;
1065 	bus_dma_segment_t seg;
1066 	void *kva;
1067 	int i, rseg, state;
1068 
1069 	/* sanity check */
1070 	if (sizeof(struct rxd_4k) != XGE_PAGE) {
1071 		printf("bad compiler struct alignment, %d != %d\n",
1072 		    (int)sizeof(struct rxd_4k), XGE_PAGE);
1073 		return EINVAL;
1074 	}
1075 
1076 	state = 0;
1077 	if (bus_dmamem_alloc(sc->sc_dmat, RXMAPSZ, PAGE_SIZE, 0,
1078 	    &seg, 1, &rseg, BUS_DMA_NOWAIT))
1079 		goto err;
1080 	state++;
1081 	if (bus_dmamem_map(sc->sc_dmat, &seg, rseg, RXMAPSZ, &kva,
1082 	    BUS_DMA_NOWAIT))
1083 		goto err;
1084 
1085 	state++;
1086 	if (bus_dmamap_create(sc->sc_dmat, RXMAPSZ, 1, RXMAPSZ, 0,
1087 	    BUS_DMA_NOWAIT, &sc->sc_rxmap))
1088 		goto err;
1089 	state++;
1090 	if (bus_dmamap_load(sc->sc_dmat, sc->sc_rxmap,
1091 	    kva, RXMAPSZ, NULL, BUS_DMA_NOWAIT))
1092 		goto err;
1093 
1094 	/* setup receive page link pointers */
1095 	for (rxpp = (struct rxd_4k *)kva, i = 0; i < NRXPAGES; i++, rxpp++) {
1096 		sc->sc_rxd_4k[i] = rxpp;
1097 		rxpp->r4_next = (uint64_t)sc->sc_rxmap->dm_segs[0].ds_addr +
1098 		    (i*sizeof(struct rxd_4k)) + sizeof(struct rxd_4k);
1099 	}
1100 	sc->sc_rxd_4k[NRXPAGES-1]->r4_next =
1101 	    (uint64_t)sc->sc_rxmap->dm_segs[0].ds_addr;
1102 
1103 	return 0;
1104 
1105 err:
1106 	if (state > 2)
1107 		bus_dmamap_destroy(sc->sc_dmat, sc->sc_txmap);
1108 	if (state > 1)
1109 		bus_dmamem_unmap(sc->sc_dmat, kva, TXMAPSZ);
1110 	if (state > 0)
1111 		bus_dmamem_free(sc->sc_dmat, &seg, rseg);
1112 	return ENOBUFS;
1113 }
1114 
1115 
1116 /*
1117  * Add a new mbuf chain to descriptor id.
1118  */
1119 int
1120 xge_add_rxbuf(struct xge_softc *sc, int id)
1121 {
1122 	struct rxdesc *rxd;
1123 	struct mbuf *m[5];
1124 	int page, desc, error;
1125 #if RX_MODE == RX_MODE_5
1126 	int i;
1127 #endif
1128 
1129 	page = id/NDESC_BUFMODE;
1130 	desc = id%NDESC_BUFMODE;
1131 
1132 	rxd = &sc->sc_rxd_4k[page]->r4_rxd[desc];
1133 
1134 	/*
1135 	 * Allocate mbufs.
1136 	 * Currently five mbufs and two clusters are used,
1137 	 * the hardware will put (ethernet, ip, tcp/udp) headers in
1138 	 * their own buffer and the clusters are only used for data.
1139 	 */
1140 #if RX_MODE == RX_MODE_1
1141 	MGETHDR(m[0], M_DONTWAIT, MT_DATA);
1142 	if (m[0] == NULL)
1143 		return ENOBUFS;
1144 	MCLGET(m[0], M_DONTWAIT);
1145 	if ((m[0]->m_flags & M_EXT) == 0) {
1146 		m_freem(m[0]);
1147 		return ENOBUFS;
1148 	}
1149 	m[0]->m_len = m[0]->m_pkthdr.len = m[0]->m_ext.ext_size;
1150 #elif RX_MODE == RX_MODE_3
1151 #error missing rxmode 3.
1152 #elif RX_MODE == RX_MODE_5
1153 	MGETHDR(m[0], M_DONTWAIT, MT_DATA);
1154 	for (i = 1; i < 5; i++) {
1155 		MGET(m[i], M_DONTWAIT, MT_DATA);
1156 	}
1157 	if (m[3])
1158 		MCLGET(m[3], M_DONTWAIT);
1159 	if (m[4])
1160 		MCLGET(m[4], M_DONTWAIT);
1161 	if (!m[0] || !m[1] || !m[2] || !m[3] || !m[4] ||
1162 	    ((m[3]->m_flags & M_EXT) == 0) || ((m[4]->m_flags & M_EXT) == 0)) {
1163 		/* Out of something */
1164 		for (i = 0; i < 5; i++)
1165 			if (m[i] != NULL)
1166 				m_free(m[i]);
1167 		return ENOBUFS;
1168 	}
1169 	/* Link'em together */
1170 	m[0]->m_next = m[1];
1171 	m[1]->m_next = m[2];
1172 	m[2]->m_next = m[3];
1173 	m[3]->m_next = m[4];
1174 #else
1175 #error bad mode RX_MODE
1176 #endif
1177 
1178 	if (sc->sc_rxb[id])
1179 		bus_dmamap_unload(sc->sc_dmat, sc->sc_rxm[id]);
1180 	sc->sc_rxb[id] = m[0];
1181 
1182 	error = bus_dmamap_load_mbuf(sc->sc_dmat, sc->sc_rxm[id], m[0],
1183 	    BUS_DMA_READ|BUS_DMA_NOWAIT);
1184 	if (error)
1185 		return error;
1186 	bus_dmamap_sync(sc->sc_dmat, sc->sc_rxm[id], 0,
1187 	    sc->sc_rxm[id]->dm_mapsize, BUS_DMASYNC_PREREAD);
1188 
1189 #if RX_MODE == RX_MODE_1
1190 	rxd->rxd_control2 = RXD_MKCTL2(m[0]->m_len, 0, 0);
1191 	rxd->rxd_buf0 = (uint64_t)sc->sc_rxm[id]->dm_segs[0].ds_addr;
1192 	rxd->rxd_control1 = RXD_CTL1_OWN;
1193 #elif RX_MODE == RX_MODE_3
1194 #elif RX_MODE == RX_MODE_5
1195 	rxd->rxd_control3 = RXD_MKCTL3(0, m[3]->m_len, m[4]->m_len);
1196 	rxd->rxd_control2 = RXD_MKCTL2(m[0]->m_len, m[1]->m_len, m[2]->m_len);
1197 	rxd->rxd_buf0 = (uint64_t)sc->sc_rxm[id]->dm_segs[0].ds_addr;
1198 	rxd->rxd_buf1 = (uint64_t)sc->sc_rxm[id]->dm_segs[1].ds_addr;
1199 	rxd->rxd_buf2 = (uint64_t)sc->sc_rxm[id]->dm_segs[2].ds_addr;
1200 	rxd->rxd_buf3 = (uint64_t)sc->sc_rxm[id]->dm_segs[3].ds_addr;
1201 	rxd->rxd_buf4 = (uint64_t)sc->sc_rxm[id]->dm_segs[4].ds_addr;
1202 	rxd->rxd_control1 = RXD_CTL1_OWN;
1203 #endif
1204 
1205 	XGE_RXSYNC(id, BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE);
1206 	return 0;
1207 }
1208 
1209 /*
1210  * These magics comes from the FreeBSD driver.
1211  */
1212 int
1213 xge_setup_xgxs(struct xge_softc *sc)
1214 {
1215 	/* The magic numbers are described in the users guide */
1216 
1217 	/* Writing to MDIO 0x8000 (Global Config 0) */
1218 	PIF_WCSR(DTX_CONTROL, 0x8000051500000000ULL); DELAY(50);
1219 	PIF_WCSR(DTX_CONTROL, 0x80000515000000E0ULL); DELAY(50);
1220 	PIF_WCSR(DTX_CONTROL, 0x80000515D93500E4ULL); DELAY(50);
1221 
1222 	/* Writing to MDIO 0x8000 (Global Config 1) */
1223 	PIF_WCSR(DTX_CONTROL, 0x8001051500000000ULL); DELAY(50);
1224 	PIF_WCSR(DTX_CONTROL, 0x80010515000000e0ULL); DELAY(50);
1225 	PIF_WCSR(DTX_CONTROL, 0x80010515001e00e4ULL); DELAY(50);
1226 
1227 	/* Reset the Gigablaze */
1228 	PIF_WCSR(DTX_CONTROL, 0x8002051500000000ULL); DELAY(50);
1229 	PIF_WCSR(DTX_CONTROL, 0x80020515000000E0ULL); DELAY(50);
1230 	PIF_WCSR(DTX_CONTROL, 0x80020515F21000E4ULL); DELAY(50);
1231 
1232 	/* read the pole settings */
1233 	PIF_WCSR(DTX_CONTROL, 0x8000051500000000ULL); DELAY(50);
1234 	PIF_WCSR(DTX_CONTROL, 0x80000515000000e0ULL); DELAY(50);
1235 	PIF_WCSR(DTX_CONTROL, 0x80000515000000ecULL); DELAY(50);
1236 
1237 	PIF_WCSR(DTX_CONTROL, 0x8001051500000000ULL); DELAY(50);
1238 	PIF_WCSR(DTX_CONTROL, 0x80010515000000e0ULL); DELAY(50);
1239 	PIF_WCSR(DTX_CONTROL, 0x80010515000000ecULL); DELAY(50);
1240 
1241 	PIF_WCSR(DTX_CONTROL, 0x8002051500000000ULL); DELAY(50);
1242 	PIF_WCSR(DTX_CONTROL, 0x80020515000000e0ULL); DELAY(50);
1243 	PIF_WCSR(DTX_CONTROL, 0x80020515000000ecULL); DELAY(50);
1244 
1245 	/* Workaround for TX Lane XAUI initialization error.
1246 	   Read Xpak PHY register 24 for XAUI lane status */
1247 	PIF_WCSR(DTX_CONTROL, 0x0018040000000000ULL); DELAY(50);
1248 	PIF_WCSR(DTX_CONTROL, 0x00180400000000e0ULL); DELAY(50);
1249 	PIF_WCSR(DTX_CONTROL, 0x00180400000000ecULL); DELAY(50);
1250 
1251 	/*
1252 	 * Reading the MDIO control with value 0x1804001c0F001c
1253 	 * means the TxLanes were already in sync
1254 	 * Reading the MDIO control with value 0x1804000c0x001c
1255 	 * means some TxLanes are not in sync where x is a 4-bit
1256 	 * value representing each lanes
1257 	 */
1258 #if 0
1259 	val = PIF_RCSR(MDIO_CONTROL);
1260 	if (val != 0x1804001c0F001cULL) {
1261 		printf("%s: MDIO_CONTROL: %llx != %llx\n",
1262 		    XNAME, val, 0x1804001c0F001cULL);
1263 		return 1;
1264 	}
1265 #endif
1266 
1267 	/* Set and remove the DTE XS INTLoopBackN */
1268 	PIF_WCSR(DTX_CONTROL, 0x0000051500000000ULL); DELAY(50);
1269 	PIF_WCSR(DTX_CONTROL, 0x00000515604000e0ULL); DELAY(50);
1270 	PIF_WCSR(DTX_CONTROL, 0x00000515604000e4ULL); DELAY(50);
1271 	PIF_WCSR(DTX_CONTROL, 0x00000515204000e4ULL); DELAY(50);
1272 	PIF_WCSR(DTX_CONTROL, 0x00000515204000ecULL); DELAY(50);
1273 
1274 #if 0
1275 	/* Reading the DTX control register Should be 0x5152040001c */
1276 	val = PIF_RCSR(DTX_CONTROL);
1277 	if (val != 0x5152040001cULL) {
1278 		printf("%s: DTX_CONTROL: %llx != %llx\n",
1279 		    XNAME, val, 0x5152040001cULL);
1280 		return 1;
1281 	}
1282 #endif
1283 
1284 	PIF_WCSR(MDIO_CONTROL, 0x0018040000000000ULL); DELAY(50);
1285 	PIF_WCSR(MDIO_CONTROL, 0x00180400000000e0ULL); DELAY(50);
1286 	PIF_WCSR(MDIO_CONTROL, 0x00180400000000ecULL); DELAY(50);
1287 
1288 #if 0
1289 	/* Reading the MIOD control should be 0x1804001c0f001c */
1290 	val = PIF_RCSR(MDIO_CONTROL);
1291 	if (val != 0x1804001c0f001cULL) {
1292 		printf("%s: MDIO_CONTROL2: %llx != %llx\n",
1293 		    XNAME, val, 0x1804001c0f001cULL);
1294 		return 1;
1295 	}
1296 #endif
1297 	return 0;
1298 }
1299