xref: /netbsd-src/sys/dev/pci/if_xge.c (revision 796c32c94f6e154afc9de0f63da35c91bb739b45)
1 /*      $NetBSD: if_xge.c,v 1.25 2016/12/15 09:28:05 ozaki-r Exp $ */
2 
3 /*
4  * Copyright (c) 2004, SUNET, Swedish University Computer Network.
5  * All rights reserved.
6  *
7  * Written by Anders Magnusson for SUNET, Swedish University Computer Network.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. All advertising materials mentioning features or use of this software
18  *    must display the following acknowledgement:
19  *      This product includes software developed for the NetBSD Project by
20  *      SUNET, Swedish University Computer Network.
21  * 4. The name of SUNET may not be used to endorse or promote products
22  *    derived from this software without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY SUNET ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
26  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
27  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL SUNET
28  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34  * POSSIBILITY OF SUCH DAMAGE.
35  */
36 
37 /*
38  * Device driver for the S2io Xframe Ten Gigabit Ethernet controller.
39  *
40  * TODO (in no specific order):
41  *	HW VLAN support.
42  *	IPv6 HW cksum.
43  */
44 
45 #include <sys/cdefs.h>
46 __KERNEL_RCSID(0, "$NetBSD: if_xge.c,v 1.25 2016/12/15 09:28:05 ozaki-r Exp $");
47 
48 
49 #include <sys/param.h>
50 #include <sys/systm.h>
51 #include <sys/mbuf.h>
52 #include <sys/malloc.h>
53 #include <sys/kernel.h>
54 #include <sys/socket.h>
55 #include <sys/device.h>
56 
57 #include <net/if.h>
58 #include <net/if_dl.h>
59 #include <net/if_media.h>
60 #include <net/if_ether.h>
61 
62 #include <net/bpf.h>
63 
64 #include <sys/bus.h>
65 #include <sys/intr.h>
66 #include <machine/endian.h>
67 
68 #include <dev/mii/mii.h>
69 #include <dev/mii/miivar.h>
70 
71 #include <dev/pci/pcivar.h>
72 #include <dev/pci/pcireg.h>
73 #include <dev/pci/pcidevs.h>
74 
75 #include <sys/proc.h>
76 
77 #include <dev/pci/if_xgereg.h>
78 
79 /*
80  * Some tunable constants, tune with care!
81  */
82 #define RX_MODE		RX_MODE_1  /* Receive mode (buffer usage, see below) */
83 #define NRXDESCS	1016	   /* # of receive descriptors (requested) */
84 #define NTXDESCS	8192	   /* Number of transmit descriptors */
85 #define NTXFRAGS	100	   /* Max fragments per packet */
86 #define XGE_EVENT_COUNTERS	   /* Instrumentation */
87 
88 /*
89  * Receive buffer modes; 1, 3 or 5 buffers.
90  */
91 #define RX_MODE_1 1
92 #define RX_MODE_3 3
93 #define RX_MODE_5 5
94 
95 /*
96  * Use clever macros to avoid a bunch of #ifdef's.
97  */
98 #define XCONCAT3(x,y,z) x ## y ## z
99 #define CONCAT3(x,y,z) XCONCAT3(x,y,z)
100 #define NDESC_BUFMODE CONCAT3(NDESC_,RX_MODE,BUFMODE)
101 #define rxd_4k CONCAT3(rxd,RX_MODE,_4k)
102 #define rxdesc ___CONCAT(rxd,RX_MODE)
103 
104 #define NEXTTX(x)	(((x)+1) % NTXDESCS)
105 #define NRXFRAGS	RX_MODE /* hardware imposed frags */
106 #define NRXPAGES	((NRXDESCS/NDESC_BUFMODE)+1)
107 #define NRXREAL		(NRXPAGES*NDESC_BUFMODE)
108 #define RXMAPSZ		(NRXPAGES*PAGE_SIZE)
109 
110 #ifdef XGE_EVENT_COUNTERS
111 #define XGE_EVCNT_INCR(ev)	(ev)->ev_count++
112 #else
113 #define XGE_EVCNT_INCR(ev)	/* nothing */
114 #endif
115 
116 /*
117  * Magics to fix a bug when the mac address can't be read correctly.
118  * Comes from the Linux driver.
119  */
120 static uint64_t fix_mac[] = {
121 	0x0060000000000000ULL, 0x0060600000000000ULL,
122 	0x0040600000000000ULL, 0x0000600000000000ULL,
123 	0x0020600000000000ULL, 0x0060600000000000ULL,
124 	0x0020600000000000ULL, 0x0060600000000000ULL,
125 	0x0020600000000000ULL, 0x0060600000000000ULL,
126 	0x0020600000000000ULL, 0x0060600000000000ULL,
127 	0x0020600000000000ULL, 0x0060600000000000ULL,
128 	0x0020600000000000ULL, 0x0060600000000000ULL,
129 	0x0020600000000000ULL, 0x0060600000000000ULL,
130 	0x0020600000000000ULL, 0x0060600000000000ULL,
131 	0x0020600000000000ULL, 0x0060600000000000ULL,
132 	0x0020600000000000ULL, 0x0060600000000000ULL,
133 	0x0020600000000000ULL, 0x0000600000000000ULL,
134 	0x0040600000000000ULL, 0x0060600000000000ULL,
135 };
136 
137 
138 struct xge_softc {
139 	device_t sc_dev;
140 	struct ethercom sc_ethercom;
141 #define sc_if sc_ethercom.ec_if
142 	bus_dma_tag_t sc_dmat;
143 	bus_space_tag_t sc_st;
144 	bus_space_handle_t sc_sh;
145 	bus_space_tag_t sc_txt;
146 	bus_space_handle_t sc_txh;
147 	void *sc_ih;
148 
149 	struct ifmedia xena_media;
150 	pcireg_t sc_pciregs[16];
151 
152 	/* Transmit structures */
153 	struct txd *sc_txd[NTXDESCS];	/* transmit frags array */
154 	bus_addr_t sc_txdp[NTXDESCS];	/* bus address of transmit frags */
155 	bus_dmamap_t sc_txm[NTXDESCS];	/* transmit frags map */
156 	struct mbuf *sc_txb[NTXDESCS];	/* transmit mbuf pointer */
157 	int sc_nexttx, sc_lasttx;
158 	bus_dmamap_t sc_txmap;		/* transmit descriptor map */
159 
160 	/* Receive data */
161 	bus_dmamap_t sc_rxmap;		/* receive descriptor map */
162 	struct rxd_4k *sc_rxd_4k[NRXPAGES]; /* receive desc pages */
163 	bus_dmamap_t sc_rxm[NRXREAL];	/* receive buffer map */
164 	struct mbuf *sc_rxb[NRXREAL];	/* mbufs on receive descriptors */
165 	int sc_nextrx;			/* next descriptor to check */
166 
167 #ifdef XGE_EVENT_COUNTERS
168 	struct evcnt sc_intr;	/* # of interrupts */
169 	struct evcnt sc_txintr;	/* # of transmit interrupts */
170 	struct evcnt sc_rxintr;	/* # of receive interrupts */
171 	struct evcnt sc_txqe;	/* # of xmit intrs when board queue empty */
172 #endif
173 };
174 
175 static int xge_match(device_t parent, cfdata_t cf, void *aux);
176 static void xge_attach(device_t parent, device_t self, void *aux);
177 static int xge_alloc_txmem(struct xge_softc *);
178 static int xge_alloc_rxmem(struct xge_softc *);
179 static void xge_start(struct ifnet *);
180 static void xge_stop(struct ifnet *, int);
181 static int xge_add_rxbuf(struct xge_softc *, int);
182 static void xge_mcast_filter(struct xge_softc *sc);
183 static int xge_setup_xgxs(struct xge_softc *sc);
184 static int xge_ioctl(struct ifnet *ifp, u_long cmd, void *data);
185 static int xge_init(struct ifnet *ifp);
186 static void xge_ifmedia_status(struct ifnet *, struct ifmediareq *);
187 static int xge_xgmii_mediachange(struct ifnet *);
188 static int xge_intr(void  *);
189 
190 /*
191  * Helpers to address registers.
192  */
193 #define PIF_WCSR(csr, val)	pif_wcsr(sc, csr, val)
194 #define PIF_RCSR(csr)		pif_rcsr(sc, csr)
195 #define TXP_WCSR(csr, val)	txp_wcsr(sc, csr, val)
196 #define PIF_WKEY(csr, val)	pif_wkey(sc, csr, val)
197 
198 static inline void
199 pif_wcsr(struct xge_softc *sc, bus_size_t csr, uint64_t val)
200 {
201 	uint32_t lval, hval;
202 
203 	lval = val&0xffffffff;
204 	hval = val>>32;
205 	bus_space_write_4(sc->sc_st, sc->sc_sh, csr, lval);
206 	bus_space_write_4(sc->sc_st, sc->sc_sh, csr+4, hval);
207 }
208 
209 static inline uint64_t
210 pif_rcsr(struct xge_softc *sc, bus_size_t csr)
211 {
212 	uint64_t val, val2;
213 	val = bus_space_read_4(sc->sc_st, sc->sc_sh, csr);
214 	val2 = bus_space_read_4(sc->sc_st, sc->sc_sh, csr+4);
215 	val |= (val2 << 32);
216 	return val;
217 }
218 
219 static inline void
220 txp_wcsr(struct xge_softc *sc, bus_size_t csr, uint64_t val)
221 {
222 	uint32_t lval, hval;
223 
224 	lval = val&0xffffffff;
225 	hval = val>>32;
226 	bus_space_write_4(sc->sc_txt, sc->sc_txh, csr, lval);
227 	bus_space_write_4(sc->sc_txt, sc->sc_txh, csr+4, hval);
228 }
229 
230 
231 static inline void
232 pif_wkey(struct xge_softc *sc, bus_size_t csr, uint64_t val)
233 {
234 	uint32_t lval, hval;
235 
236 	lval = val&0xffffffff;
237 	hval = val>>32;
238 	PIF_WCSR(RMAC_CFG_KEY, RMAC_KEY_VALUE);
239 	bus_space_write_4(sc->sc_st, sc->sc_sh, csr, lval);
240 	PIF_WCSR(RMAC_CFG_KEY, RMAC_KEY_VALUE);
241 	bus_space_write_4(sc->sc_st, sc->sc_sh, csr+4, hval);
242 }
243 
244 
245 CFATTACH_DECL_NEW(xge, sizeof(struct xge_softc),
246     xge_match, xge_attach, NULL, NULL);
247 
248 #define XNAME device_xname(sc->sc_dev)
249 
250 #define XGE_RXSYNC(desc, what) \
251 	bus_dmamap_sync(sc->sc_dmat, sc->sc_rxmap, \
252 	(desc/NDESC_BUFMODE) * XGE_PAGE + sizeof(struct rxdesc) * \
253 	(desc%NDESC_BUFMODE), sizeof(struct rxdesc), what)
254 #define XGE_RXD(desc)	&sc->sc_rxd_4k[desc/NDESC_BUFMODE]-> \
255 	r4_rxd[desc%NDESC_BUFMODE]
256 
257 /*
258  * Non-tunable constants.
259  */
260 #define XGE_MAX_MTU		9600
261 #define	XGE_IP_MAXPACKET	65535	/* same as IP_MAXPACKET */
262 
263 static int
264 xge_match(device_t parent, cfdata_t cf, void *aux)
265 {
266 	struct pci_attach_args *pa = aux;
267 
268 	if (PCI_VENDOR(pa->pa_id) == PCI_VENDOR_S2IO &&
269 	    PCI_PRODUCT(pa->pa_id) == PCI_PRODUCT_S2IO_XFRAME)
270 		return (1);
271 
272 	return (0);
273 }
274 
275 void
276 xge_attach(device_t parent, device_t self, void *aux)
277 {
278 	struct pci_attach_args *pa = aux;
279 	struct xge_softc *sc;
280 	struct ifnet *ifp;
281 	pcireg_t memtype;
282 	pci_intr_handle_t ih;
283 	const char *intrstr = NULL;
284 	pci_chipset_tag_t pc = pa->pa_pc;
285 	uint8_t enaddr[ETHER_ADDR_LEN];
286 	uint64_t val;
287 	int i;
288 	char intrbuf[PCI_INTRSTR_LEN];
289 
290 	sc = device_private(self);
291 	sc->sc_dev = self;
292 	sc->sc_dmat = pa->pa_dmat;
293 
294 	/* Get BAR0 address */
295 	memtype = pci_mapreg_type(pa->pa_pc, pa->pa_tag, XGE_PIF_BAR);
296 	if (pci_mapreg_map(pa, XGE_PIF_BAR, memtype, 0,
297 	    &sc->sc_st, &sc->sc_sh, 0, 0)) {
298 		aprint_error("%s: unable to map PIF BAR registers\n", XNAME);
299 		return;
300 	}
301 
302 	memtype = pci_mapreg_type(pa->pa_pc, pa->pa_tag, XGE_TXP_BAR);
303 	if (pci_mapreg_map(pa, XGE_TXP_BAR, memtype, 0,
304 	    &sc->sc_txt, &sc->sc_txh, 0, 0)) {
305 		aprint_error("%s: unable to map TXP BAR registers\n", XNAME);
306 		return;
307 	}
308 
309 	/* Save PCI config space */
310 	for (i = 0; i < 64; i += 4)
311 		sc->sc_pciregs[i/4] = pci_conf_read(pa->pa_pc, pa->pa_tag, i);
312 
313 #if BYTE_ORDER == LITTLE_ENDIAN
314 	val = (uint64_t)0xFFFFFFFFFFFFFFFFULL;
315 	val &= ~(TxF_R_SE|RxF_W_SE);
316 	PIF_WCSR(SWAPPER_CTRL, val);
317 	PIF_WCSR(SWAPPER_CTRL, val);
318 #elif BYTE_ORDER == BIG_ENDIAN
319 	/* do nothing */
320 #else
321 #error bad endianness!
322 #endif
323 
324 	if ((val = PIF_RCSR(PIF_RD_SWAPPER_Fb)) != SWAPPER_MAGIC) {
325 		aprint_error("%s: failed configuring endian, %llx != %llx!\n",
326 		    XNAME, (unsigned long long)val, SWAPPER_MAGIC);
327 		return;
328 	}
329 
330 	/*
331 	 * The MAC addr may be all FF's, which is not good.
332 	 * Resolve it by writing some magics to GPIO_CONTROL and
333 	 * force a chip reset to read in the serial eeprom again.
334 	 */
335 	for (i = 0; i < sizeof(fix_mac)/sizeof(fix_mac[0]); i++) {
336 		PIF_WCSR(GPIO_CONTROL, fix_mac[i]);
337 		PIF_RCSR(GPIO_CONTROL);
338 	}
339 
340 	/*
341 	 * Reset the chip and restore the PCI registers.
342 	 */
343 	PIF_WCSR(SW_RESET, 0xa5a5a50000000000ULL);
344 	DELAY(500000);
345 	for (i = 0; i < 64; i += 4)
346 		pci_conf_write(pa->pa_pc, pa->pa_tag, i, sc->sc_pciregs[i/4]);
347 
348 	/*
349 	 * Restore the byte order registers.
350 	 */
351 #if BYTE_ORDER == LITTLE_ENDIAN
352 	val = (uint64_t)0xFFFFFFFFFFFFFFFFULL;
353 	val &= ~(TxF_R_SE|RxF_W_SE);
354 	PIF_WCSR(SWAPPER_CTRL, val);
355 	PIF_WCSR(SWAPPER_CTRL, val);
356 #elif BYTE_ORDER == BIG_ENDIAN
357 	/* do nothing */
358 #else
359 #error bad endianness!
360 #endif
361 
362 	if ((val = PIF_RCSR(PIF_RD_SWAPPER_Fb)) != SWAPPER_MAGIC) {
363 		aprint_error("%s: failed configuring endian2, %llx != %llx!\n",
364 		    XNAME, (unsigned long long)val, SWAPPER_MAGIC);
365 		return;
366 	}
367 
368 	/*
369 	 * XGXS initialization.
370 	 */
371 	/* 29, reset */
372 	PIF_WCSR(SW_RESET, 0);
373 	DELAY(500000);
374 
375 	/* 30, configure XGXS transceiver */
376 	xge_setup_xgxs(sc);
377 
378 	/* 33, program MAC address (not needed here) */
379 	/* Get ethernet address */
380 	PIF_WCSR(RMAC_ADDR_CMD_MEM,
381 	    RMAC_ADDR_CMD_MEM_STR|RMAC_ADDR_CMD_MEM_OFF(0));
382 	while (PIF_RCSR(RMAC_ADDR_CMD_MEM) & RMAC_ADDR_CMD_MEM_STR)
383 		;
384 	val = PIF_RCSR(RMAC_ADDR_DATA0_MEM);
385 	for (i = 0; i < ETHER_ADDR_LEN; i++)
386 		enaddr[i] = (uint8_t)(val >> (56 - (8*i)));
387 
388 	/*
389 	 * Get memory for transmit descriptor lists.
390 	 */
391 	if (xge_alloc_txmem(sc)) {
392 		aprint_error("%s: failed allocating txmem.\n", XNAME);
393 		return;
394 	}
395 
396 	/* 9 and 10 - set FIFO number/prio */
397 	PIF_WCSR(TX_FIFO_P0, TX_FIFO_LEN0(NTXDESCS));
398 	PIF_WCSR(TX_FIFO_P1, 0ULL);
399 	PIF_WCSR(TX_FIFO_P2, 0ULL);
400 	PIF_WCSR(TX_FIFO_P3, 0ULL);
401 
402 	/* 11, XXX set round-robin prio? */
403 
404 	/* 12, enable transmit FIFO */
405 	val = PIF_RCSR(TX_FIFO_P0);
406 	val |= TX_FIFO_ENABLE;
407 	PIF_WCSR(TX_FIFO_P0, val);
408 
409 	/* 13, disable some error checks */
410 	PIF_WCSR(TX_PA_CFG,
411 	    TX_PA_CFG_IFR|TX_PA_CFG_ISO|TX_PA_CFG_ILC|TX_PA_CFG_ILE);
412 
413 	/*
414 	 * Create transmit DMA maps.
415 	 * Make them large for TSO.
416 	 */
417 	for (i = 0; i < NTXDESCS; i++) {
418 		if (bus_dmamap_create(sc->sc_dmat, XGE_IP_MAXPACKET,
419 		    NTXFRAGS, MCLBYTES, 0, 0, &sc->sc_txm[i])) {
420 			aprint_error("%s: cannot create TX DMA maps\n", XNAME);
421 			return;
422 		}
423 	}
424 
425 	sc->sc_lasttx = NTXDESCS-1;
426 
427 	/*
428 	 * RxDMA initialization.
429 	 * Only use one out of 8 possible receive queues.
430 	 */
431 	if (xge_alloc_rxmem(sc)) {	/* allocate rx descriptor memory */
432 		aprint_error("%s: failed allocating rxmem\n", XNAME);
433 		return;
434 	}
435 
436 	/* Create receive buffer DMA maps */
437 	for (i = 0; i < NRXREAL; i++) {
438 		if (bus_dmamap_create(sc->sc_dmat, XGE_MAX_MTU,
439 		    NRXFRAGS, MCLBYTES, 0, 0, &sc->sc_rxm[i])) {
440 			aprint_error("%s: cannot create RX DMA maps\n", XNAME);
441 			return;
442 		}
443 	}
444 
445 	/* allocate mbufs to receive descriptors */
446 	for (i = 0; i < NRXREAL; i++)
447 		if (xge_add_rxbuf(sc, i))
448 			panic("out of mbufs too early");
449 
450 	/* 14, setup receive ring priority */
451 	PIF_WCSR(RX_QUEUE_PRIORITY, 0ULL); /* only use one ring */
452 
453 	/* 15, setup receive ring round-robin calendar */
454 	PIF_WCSR(RX_W_ROUND_ROBIN_0, 0ULL); /* only use one ring */
455 	PIF_WCSR(RX_W_ROUND_ROBIN_1, 0ULL);
456 	PIF_WCSR(RX_W_ROUND_ROBIN_2, 0ULL);
457 	PIF_WCSR(RX_W_ROUND_ROBIN_3, 0ULL);
458 	PIF_WCSR(RX_W_ROUND_ROBIN_4, 0ULL);
459 
460 	/* 16, write receive ring start address */
461 	PIF_WCSR(PRC_RXD0_0, (uint64_t)sc->sc_rxmap->dm_segs[0].ds_addr);
462 	/* PRC_RXD0_[1-7] are not used */
463 
464 	/* 17, Setup alarm registers */
465 	PIF_WCSR(PRC_ALARM_ACTION, 0ULL); /* Default everything to retry */
466 
467 	/* 18, init receive ring controller */
468 #if RX_MODE == RX_MODE_1
469 	val = RING_MODE_1;
470 #elif RX_MODE == RX_MODE_3
471 	val = RING_MODE_3;
472 #else /* RX_MODE == RX_MODE_5 */
473 	val = RING_MODE_5;
474 #endif
475 	PIF_WCSR(PRC_CTRL_0, RC_IN_SVC|val);
476 	/* leave 1-7 disabled */
477 	/* XXXX snoop configuration? */
478 
479 	/* 19, set chip memory assigned to the queue */
480 	PIF_WCSR(RX_QUEUE_CFG, MC_QUEUE(0, 64)); /* all 64M to queue 0 */
481 
482 	/* 20, setup RLDRAM parameters */
483 	/* do not touch it for now */
484 
485 	/* 21, setup pause frame thresholds */
486 	/* so not touch the defaults */
487 	/* XXX - must 0xff be written as stated in the manual? */
488 
489 	/* 22, configure RED */
490 	/* we do not want to drop packets, so ignore */
491 
492 	/* 23, initiate RLDRAM */
493 	val = PIF_RCSR(MC_RLDRAM_MRS);
494 	val |= MC_QUEUE_SIZE_ENABLE|MC_RLDRAM_MRS_ENABLE;
495 	PIF_WCSR(MC_RLDRAM_MRS, val);
496 	DELAY(1000);
497 
498 	/*
499 	 * Setup interrupt policies.
500 	 */
501 	/* 40, Transmit interrupts */
502 	PIF_WCSR(TTI_DATA1_MEM, TX_TIMER_VAL(0x1ff) | TX_TIMER_AC |
503 	    TX_URNG_A(5) | TX_URNG_B(20) | TX_URNG_C(48));
504 	PIF_WCSR(TTI_DATA2_MEM,
505 	    TX_UFC_A(25) | TX_UFC_B(64) | TX_UFC_C(128) | TX_UFC_D(512));
506 	PIF_WCSR(TTI_COMMAND_MEM, TTI_CMD_MEM_WE | TTI_CMD_MEM_STROBE);
507 	while (PIF_RCSR(TTI_COMMAND_MEM) & TTI_CMD_MEM_STROBE)
508 		;
509 
510 	/* 41, Receive interrupts */
511 	PIF_WCSR(RTI_DATA1_MEM, RX_TIMER_VAL(0x800) | RX_TIMER_AC |
512 	    RX_URNG_A(5) | RX_URNG_B(20) | RX_URNG_C(50));
513 	PIF_WCSR(RTI_DATA2_MEM,
514 	    RX_UFC_A(64) | RX_UFC_B(128) | RX_UFC_C(256) | RX_UFC_D(512));
515 	PIF_WCSR(RTI_COMMAND_MEM, RTI_CMD_MEM_WE | RTI_CMD_MEM_STROBE);
516 	while (PIF_RCSR(RTI_COMMAND_MEM) & RTI_CMD_MEM_STROBE)
517 		;
518 
519 	/*
520 	 * Setup media stuff.
521 	 */
522 	ifmedia_init(&sc->xena_media, IFM_IMASK, xge_xgmii_mediachange,
523 	    xge_ifmedia_status);
524 	ifmedia_add(&sc->xena_media, IFM_ETHER|IFM_10G_LR, 0, NULL);
525 	ifmedia_set(&sc->xena_media, IFM_ETHER|IFM_10G_LR);
526 
527 	aprint_normal("%s: Ethernet address %s\n", XNAME,
528 	    ether_sprintf(enaddr));
529 
530 	ifp = &sc->sc_ethercom.ec_if;
531 	strlcpy(ifp->if_xname, device_xname(sc->sc_dev), IFNAMSIZ);
532 	ifp->if_baudrate = 10000000000LL;
533 	ifp->if_init = xge_init;
534 	ifp->if_stop = xge_stop;
535 	ifp->if_softc = sc;
536 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
537 	ifp->if_ioctl = xge_ioctl;
538 	ifp->if_start = xge_start;
539 	IFQ_SET_MAXLEN(&ifp->if_snd, max(NTXDESCS - 1, IFQ_MAXLEN));
540 	IFQ_SET_READY(&ifp->if_snd);
541 
542 	/*
543 	 * Offloading capabilities.
544 	 */
545 	sc->sc_ethercom.ec_capabilities |=
546 	    ETHERCAP_JUMBO_MTU | ETHERCAP_VLAN_MTU;
547 	ifp->if_capabilities |=
548 	    IFCAP_CSUM_IPv4_Rx | IFCAP_CSUM_IPv4_Tx |
549 	    IFCAP_CSUM_TCPv4_Rx | IFCAP_CSUM_TCPv4_Tx |
550 	    IFCAP_CSUM_UDPv4_Rx | IFCAP_CSUM_UDPv4_Tx | IFCAP_TSOv4;
551 
552 	/*
553 	 * Attach the interface.
554 	 */
555 	if_attach(ifp);
556 	if_deferred_start_init(ifp, NULL);
557 	ether_ifattach(ifp, enaddr);
558 
559 	/*
560 	 * Setup interrupt vector before initializing.
561 	 */
562 	if (pci_intr_map(pa, &ih)) {
563 		aprint_error_dev(sc->sc_dev, "unable to map interrupt\n");
564 		return;
565 	}
566 	intrstr = pci_intr_string(pc, ih, intrbuf, sizeof(intrbuf));
567 	if ((sc->sc_ih =
568 		pci_intr_establish(pc, ih, IPL_NET, xge_intr, sc)) == NULL) {
569 		aprint_error_dev(sc->sc_dev,
570 		    "unable to establish interrupt at %s\n",
571 		    intrstr ? intrstr : "<unknown>");
572 		return;
573 	}
574 	aprint_normal_dev(sc->sc_dev, "interrupting at %s\n", intrstr);
575 
576 #ifdef XGE_EVENT_COUNTERS
577 	evcnt_attach_dynamic(&sc->sc_intr, EVCNT_TYPE_MISC,
578 	    NULL, XNAME, "intr");
579 	evcnt_attach_dynamic(&sc->sc_txintr, EVCNT_TYPE_MISC,
580 	    NULL, XNAME, "txintr");
581 	evcnt_attach_dynamic(&sc->sc_rxintr, EVCNT_TYPE_MISC,
582 	    NULL, XNAME, "rxintr");
583 	evcnt_attach_dynamic(&sc->sc_txqe, EVCNT_TYPE_MISC,
584 	    NULL, XNAME, "txqe");
585 #endif
586 }
587 
588 void
589 xge_ifmedia_status(struct ifnet *ifp, struct ifmediareq *ifmr)
590 {
591 	struct xge_softc *sc = ifp->if_softc;
592 	uint64_t reg;
593 
594 	ifmr->ifm_status = IFM_AVALID;
595 	ifmr->ifm_active = IFM_ETHER|IFM_10G_LR;
596 
597 	reg = PIF_RCSR(ADAPTER_STATUS);
598 	if ((reg & (RMAC_REMOTE_FAULT|RMAC_LOCAL_FAULT)) == 0)
599 		ifmr->ifm_status |= IFM_ACTIVE;
600 }
601 
602 int
603 xge_xgmii_mediachange(struct ifnet *ifp)
604 {
605 	return 0;
606 }
607 
608 static void
609 xge_enable(struct xge_softc *sc)
610 {
611 	uint64_t val;
612 
613 	/* 2, enable adapter */
614 	val = PIF_RCSR(ADAPTER_CONTROL);
615 	val |= ADAPTER_EN;
616 	PIF_WCSR(ADAPTER_CONTROL, val);
617 
618 	/* 3, light the card enable led */
619 	val = PIF_RCSR(ADAPTER_CONTROL);
620 	val |= LED_ON;
621 	PIF_WCSR(ADAPTER_CONTROL, val);
622 	printf("%s: link up\n", XNAME);
623 
624 }
625 
626 int
627 xge_init(struct ifnet *ifp)
628 {
629 	struct xge_softc *sc = ifp->if_softc;
630 	uint64_t val;
631 
632 	if (ifp->if_flags & IFF_RUNNING)
633 		return 0;
634 
635 	/* 31+32, setup MAC config */
636 	PIF_WKEY(MAC_CFG, TMAC_EN|RMAC_EN|TMAC_APPEND_PAD|RMAC_STRIP_FCS|
637 	    RMAC_BCAST_EN|RMAC_DISCARD_PFRM|RMAC_PROM_EN);
638 
639 	DELAY(1000);
640 
641 	/* 54, ensure that the adapter is 'quiescent' */
642 	val = PIF_RCSR(ADAPTER_STATUS);
643 	if ((val & QUIESCENT) != QUIESCENT) {
644 		char buf[200];
645 		printf("%s: adapter not quiescent, aborting\n", XNAME);
646 		val = (val & QUIESCENT) ^ QUIESCENT;
647 		snprintb(buf, sizeof buf, QUIESCENT_BMSK, val);
648 		printf("%s: ADAPTER_STATUS missing bits %s\n", XNAME, buf);
649 		return 1;
650 	}
651 
652 	/* 56, enable the transmit laser */
653 	val = PIF_RCSR(ADAPTER_CONTROL);
654 	val |= EOI_TX_ON;
655 	PIF_WCSR(ADAPTER_CONTROL, val);
656 
657 	xge_enable(sc);
658 	/*
659 	 * Enable all interrupts
660 	 */
661 	PIF_WCSR(TX_TRAFFIC_MASK, 0);
662 	PIF_WCSR(RX_TRAFFIC_MASK, 0);
663 	PIF_WCSR(GENERAL_INT_MASK, 0);
664 	PIF_WCSR(TXPIC_INT_MASK, 0);
665 	PIF_WCSR(RXPIC_INT_MASK, 0);
666 	PIF_WCSR(MAC_INT_MASK, MAC_TMAC_INT); /* only from RMAC */
667 	PIF_WCSR(MAC_RMAC_ERR_MASK, ~RMAC_LINK_STATE_CHANGE_INT);
668 
669 
670 	/* Done... */
671 	ifp->if_flags |= IFF_RUNNING;
672 	ifp->if_flags &= ~IFF_OACTIVE;
673 
674 	return 0;
675 }
676 
677 static void
678 xge_stop(struct ifnet *ifp, int disable)
679 {
680 	struct xge_softc *sc = ifp->if_softc;
681 	uint64_t val;
682 
683 	val = PIF_RCSR(ADAPTER_CONTROL);
684 	val &= ~ADAPTER_EN;
685 	PIF_WCSR(ADAPTER_CONTROL, val);
686 
687 	while ((PIF_RCSR(ADAPTER_STATUS) & QUIESCENT) != QUIESCENT)
688 		;
689 }
690 
691 int
692 xge_intr(void *pv)
693 {
694 	struct xge_softc *sc = pv;
695 	struct txd *txd;
696 	struct ifnet *ifp = &sc->sc_if;
697 	bus_dmamap_t dmp;
698 	uint64_t val;
699 	int i, lasttx, plen;
700 
701 	val = PIF_RCSR(GENERAL_INT_STATUS);
702 	if (val == 0)
703 		return 0; /* no interrupt here */
704 
705 	XGE_EVCNT_INCR(&sc->sc_intr);
706 
707 	PIF_WCSR(GENERAL_INT_STATUS, val);
708 
709 	if ((val = PIF_RCSR(MAC_RMAC_ERR_REG)) & RMAC_LINK_STATE_CHANGE_INT) {
710 		/* Wait for quiescence */
711 		printf("%s: link down\n", XNAME);
712 		while ((PIF_RCSR(ADAPTER_STATUS) & QUIESCENT) != QUIESCENT)
713 			;
714 		PIF_WCSR(MAC_RMAC_ERR_REG, RMAC_LINK_STATE_CHANGE_INT);
715 
716 		val = PIF_RCSR(ADAPTER_STATUS);
717 		if ((val & (RMAC_REMOTE_FAULT|RMAC_LOCAL_FAULT)) == 0)
718 			xge_enable(sc); /* Only if link restored */
719 	}
720 
721 	if ((val = PIF_RCSR(TX_TRAFFIC_INT))) {
722 		XGE_EVCNT_INCR(&sc->sc_txintr);
723 		PIF_WCSR(TX_TRAFFIC_INT, val); /* clear interrupt bits */
724 	}
725 	/*
726 	 * Collect sent packets.
727 	 */
728 	lasttx = sc->sc_lasttx;
729 	while ((i = NEXTTX(sc->sc_lasttx)) != sc->sc_nexttx) {
730 		txd = sc->sc_txd[i];
731 		dmp = sc->sc_txm[i];
732 
733 		bus_dmamap_sync(sc->sc_dmat, dmp, 0,
734 		    dmp->dm_mapsize,
735 		    BUS_DMASYNC_POSTREAD|BUS_DMASYNC_POSTWRITE);
736 
737 		if (txd->txd_control1 & TXD_CTL1_OWN) {
738 			bus_dmamap_sync(sc->sc_dmat, dmp, 0,
739 			    dmp->dm_mapsize, BUS_DMASYNC_PREREAD);
740 			break;
741 		}
742 		bus_dmamap_unload(sc->sc_dmat, dmp);
743 		m_freem(sc->sc_txb[i]);
744 		ifp->if_opackets++;
745 		sc->sc_lasttx = i;
746 	}
747 	if (i == sc->sc_nexttx) {
748 		XGE_EVCNT_INCR(&sc->sc_txqe);
749 	}
750 
751 	if (sc->sc_lasttx != lasttx)
752 		ifp->if_flags &= ~IFF_OACTIVE;
753 
754 	if_schedule_deferred_start(ifp); /* Try to get more packets on the wire */
755 
756 	if ((val = PIF_RCSR(RX_TRAFFIC_INT))) {
757 		XGE_EVCNT_INCR(&sc->sc_rxintr);
758 		PIF_WCSR(RX_TRAFFIC_INT, val); /* clear interrupt bits */
759 	}
760 
761 	for (;;) {
762 		struct rxdesc *rxd;
763 		struct mbuf *m;
764 
765 		XGE_RXSYNC(sc->sc_nextrx,
766 		    BUS_DMASYNC_POSTREAD|BUS_DMASYNC_POSTWRITE);
767 
768 		rxd = XGE_RXD(sc->sc_nextrx);
769 		if (rxd->rxd_control1 & RXD_CTL1_OWN) {
770 			XGE_RXSYNC(sc->sc_nextrx, BUS_DMASYNC_PREREAD);
771 			break;
772 		}
773 
774 		/* got a packet */
775 		m = sc->sc_rxb[sc->sc_nextrx];
776 #if RX_MODE == RX_MODE_1
777 		plen = m->m_len = RXD_CTL2_BUF0SIZ(rxd->rxd_control2);
778 #elif RX_MODE == RX_MODE_3
779 #error Fix rxmodes in xge_intr
780 #elif RX_MODE == RX_MODE_5
781 		plen = m->m_len = RXD_CTL2_BUF0SIZ(rxd->rxd_control2);
782 		plen += m->m_next->m_len = RXD_CTL2_BUF1SIZ(rxd->rxd_control2);
783 		plen += m->m_next->m_next->m_len =
784 		    RXD_CTL2_BUF2SIZ(rxd->rxd_control2);
785 		plen += m->m_next->m_next->m_next->m_len =
786 		    RXD_CTL3_BUF3SIZ(rxd->rxd_control3);
787 		plen += m->m_next->m_next->m_next->m_next->m_len =
788 		    RXD_CTL3_BUF4SIZ(rxd->rxd_control3);
789 #endif
790 		m_set_rcvif(m, ifp);
791 		m->m_pkthdr.len = plen;
792 
793 		val = rxd->rxd_control1;
794 
795 		if (xge_add_rxbuf(sc, sc->sc_nextrx)) {
796 			/* Failed, recycle this mbuf */
797 #if RX_MODE == RX_MODE_1
798 			rxd->rxd_control2 = RXD_MKCTL2(MCLBYTES, 0, 0);
799 			rxd->rxd_control1 = RXD_CTL1_OWN;
800 #elif RX_MODE == RX_MODE_3
801 #elif RX_MODE == RX_MODE_5
802 #endif
803 			XGE_RXSYNC(sc->sc_nextrx,
804 			    BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE);
805 			ifp->if_ierrors++;
806 			break;
807 		}
808 
809 		if (RXD_CTL1_PROTOS(val) & (RXD_CTL1_P_IPv4|RXD_CTL1_P_IPv6)) {
810 			m->m_pkthdr.csum_flags |= M_CSUM_IPv4;
811 			if (RXD_CTL1_L3CSUM(val) != 0xffff)
812 				m->m_pkthdr.csum_flags |= M_CSUM_IPv4_BAD;
813 		}
814 		if (RXD_CTL1_PROTOS(val) & RXD_CTL1_P_TCP) {
815 			m->m_pkthdr.csum_flags |= M_CSUM_TCPv4|M_CSUM_TCPv6;
816 			if (RXD_CTL1_L4CSUM(val) != 0xffff)
817 				m->m_pkthdr.csum_flags |= M_CSUM_TCP_UDP_BAD;
818 		}
819 		if (RXD_CTL1_PROTOS(val) & RXD_CTL1_P_UDP) {
820 			m->m_pkthdr.csum_flags |= M_CSUM_UDPv4|M_CSUM_UDPv6;
821 			if (RXD_CTL1_L4CSUM(val) != 0xffff)
822 				m->m_pkthdr.csum_flags |= M_CSUM_TCP_UDP_BAD;
823 		}
824 
825 		if_percpuq_enqueue(ifp->if_percpuq, m);
826 
827 		if (++sc->sc_nextrx == NRXREAL)
828 			sc->sc_nextrx = 0;
829 
830 	}
831 
832 	return 0;
833 }
834 
835 int
836 xge_ioctl(struct ifnet *ifp, u_long cmd, void *data)
837 {
838 	struct xge_softc *sc = ifp->if_softc;
839 	struct ifreq *ifr = (struct ifreq *) data;
840 	int s, error = 0;
841 
842 	s = splnet();
843 
844 	switch (cmd) {
845 	case SIOCSIFMTU:
846 		if (ifr->ifr_mtu < ETHERMIN || ifr->ifr_mtu > XGE_MAX_MTU)
847 			error = EINVAL;
848 		else if ((error = ifioctl_common(ifp, cmd, data)) == ENETRESET){
849 			PIF_WCSR(RMAC_MAX_PYLD_LEN,
850 			    RMAC_PYLD_LEN(ifr->ifr_mtu));
851 			error = 0;
852 		}
853 		break;
854 
855 	case SIOCGIFMEDIA:
856 	case SIOCSIFMEDIA:
857 		error = ifmedia_ioctl(ifp, ifr, &sc->xena_media, cmd);
858 		break;
859 
860 	default:
861 		if ((error = ether_ioctl(ifp, cmd, data)) != ENETRESET)
862 			break;
863 
864 		error = 0;
865 
866 		if (cmd != SIOCADDMULTI && cmd != SIOCDELMULTI)
867 			;
868 		else if (ifp->if_flags & IFF_RUNNING) {
869 			/* Change multicast list */
870 			xge_mcast_filter(sc);
871 		}
872 		break;
873 	}
874 
875 	splx(s);
876 	return(error);
877 }
878 
879 void
880 xge_mcast_filter(struct xge_softc *sc)
881 {
882 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
883 	struct ethercom *ec = &sc->sc_ethercom;
884 	struct ether_multi *enm;
885 	struct ether_multistep step;
886 	int i, numaddr = 1; /* first slot used for card unicast address */
887 	uint64_t val;
888 
889 	ETHER_FIRST_MULTI(step, ec, enm);
890 	while (enm != NULL) {
891 		if (memcmp(enm->enm_addrlo, enm->enm_addrhi, ETHER_ADDR_LEN)) {
892 			/* Skip ranges */
893 			goto allmulti;
894 		}
895 		if (numaddr == MAX_MCAST_ADDR)
896 			goto allmulti;
897 		for (val = 0, i = 0; i < ETHER_ADDR_LEN; i++) {
898 			val <<= 8;
899 			val |= enm->enm_addrlo[i];
900 		}
901 		PIF_WCSR(RMAC_ADDR_DATA0_MEM, val << 16);
902 		PIF_WCSR(RMAC_ADDR_DATA1_MEM, 0xFFFFFFFFFFFFFFFFULL);
903 		PIF_WCSR(RMAC_ADDR_CMD_MEM, RMAC_ADDR_CMD_MEM_WE|
904 		    RMAC_ADDR_CMD_MEM_STR|RMAC_ADDR_CMD_MEM_OFF(numaddr));
905 		while (PIF_RCSR(RMAC_ADDR_CMD_MEM) & RMAC_ADDR_CMD_MEM_STR)
906 			;
907 		numaddr++;
908 		ETHER_NEXT_MULTI(step, enm);
909 	}
910 	/* set the remaining entries to the broadcast address */
911 	for (i = numaddr; i < MAX_MCAST_ADDR; i++) {
912 		PIF_WCSR(RMAC_ADDR_DATA0_MEM, 0xffffffffffff0000ULL);
913 		PIF_WCSR(RMAC_ADDR_DATA1_MEM, 0xFFFFFFFFFFFFFFFFULL);
914 		PIF_WCSR(RMAC_ADDR_CMD_MEM, RMAC_ADDR_CMD_MEM_WE|
915 		    RMAC_ADDR_CMD_MEM_STR|RMAC_ADDR_CMD_MEM_OFF(i));
916 		while (PIF_RCSR(RMAC_ADDR_CMD_MEM) & RMAC_ADDR_CMD_MEM_STR)
917 			;
918 	}
919 	ifp->if_flags &= ~IFF_ALLMULTI;
920 	return;
921 
922 allmulti:
923 	/* Just receive everything with the multicast bit set */
924 	ifp->if_flags |= IFF_ALLMULTI;
925 	PIF_WCSR(RMAC_ADDR_DATA0_MEM, 0x8000000000000000ULL);
926 	PIF_WCSR(RMAC_ADDR_DATA1_MEM, 0xF000000000000000ULL);
927 	PIF_WCSR(RMAC_ADDR_CMD_MEM, RMAC_ADDR_CMD_MEM_WE|
928 	    RMAC_ADDR_CMD_MEM_STR|RMAC_ADDR_CMD_MEM_OFF(1));
929 	while (PIF_RCSR(RMAC_ADDR_CMD_MEM) & RMAC_ADDR_CMD_MEM_STR)
930 		;
931 }
932 
933 void
934 xge_start(struct ifnet *ifp)
935 {
936 	struct xge_softc *sc = ifp->if_softc;
937 	struct txd *txd = NULL; /* XXX - gcc */
938 	bus_dmamap_t dmp;
939 	struct	mbuf *m;
940 	uint64_t par, lcr;
941 	int nexttx = 0, ntxd, error, i;
942 
943 	if ((ifp->if_flags & (IFF_RUNNING|IFF_OACTIVE)) != IFF_RUNNING)
944 		return;
945 
946 	par = lcr = 0;
947 	for (;;) {
948 		IFQ_POLL(&ifp->if_snd, m);
949 		if (m == NULL)
950 			break;	/* out of packets */
951 
952 		if (sc->sc_nexttx == sc->sc_lasttx)
953 			break;	/* No more space */
954 
955 		nexttx = sc->sc_nexttx;
956 		dmp = sc->sc_txm[nexttx];
957 
958 		if ((error = bus_dmamap_load_mbuf(sc->sc_dmat, dmp, m,
959 		    BUS_DMA_WRITE|BUS_DMA_NOWAIT)) != 0) {
960 			printf("%s: bus_dmamap_load_mbuf error %d\n",
961 			    XNAME, error);
962 			break;
963 		}
964 		IFQ_DEQUEUE(&ifp->if_snd, m);
965 
966 		bus_dmamap_sync(sc->sc_dmat, dmp, 0, dmp->dm_mapsize,
967 		    BUS_DMASYNC_PREWRITE);
968 
969 		txd = sc->sc_txd[nexttx];
970 		sc->sc_txb[nexttx] = m;
971 		for (i = 0; i < dmp->dm_nsegs; i++) {
972 			if (dmp->dm_segs[i].ds_len == 0)
973 				continue;
974 			txd->txd_control1 = dmp->dm_segs[i].ds_len;
975 			txd->txd_control2 = 0;
976 			txd->txd_bufaddr = dmp->dm_segs[i].ds_addr;
977 			txd++;
978 		}
979 		ntxd = txd - sc->sc_txd[nexttx] - 1;
980 		txd = sc->sc_txd[nexttx];
981 		txd->txd_control1 |= TXD_CTL1_OWN|TXD_CTL1_GCF;
982 		txd->txd_control2 = TXD_CTL2_UTIL;
983 		if (m->m_pkthdr.csum_flags & M_CSUM_TSOv4) {
984 			txd->txd_control1 |= TXD_CTL1_MSS(m->m_pkthdr.segsz);
985 			txd->txd_control1 |= TXD_CTL1_LSO;
986 		}
987 
988 		if (m->m_pkthdr.csum_flags & M_CSUM_IPv4)
989 			txd->txd_control2 |= TXD_CTL2_CIPv4;
990 		if (m->m_pkthdr.csum_flags & M_CSUM_TCPv4)
991 			txd->txd_control2 |= TXD_CTL2_CTCP;
992 		if (m->m_pkthdr.csum_flags & M_CSUM_UDPv4)
993 			txd->txd_control2 |= TXD_CTL2_CUDP;
994 		txd[ntxd].txd_control1 |= TXD_CTL1_GCL;
995 
996 		bus_dmamap_sync(sc->sc_dmat, dmp, 0, dmp->dm_mapsize,
997 		    BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE);
998 
999 		par = sc->sc_txdp[nexttx];
1000 		lcr = TXDL_NUMTXD(ntxd) | TXDL_LGC_FIRST | TXDL_LGC_LAST;
1001 		if (m->m_pkthdr.csum_flags & M_CSUM_TSOv4)
1002 			lcr |= TXDL_SFF;
1003 		TXP_WCSR(TXDL_PAR, par);
1004 		TXP_WCSR(TXDL_LCR, lcr);
1005 
1006 		bpf_mtap(ifp, m);
1007 
1008 		sc->sc_nexttx = NEXTTX(nexttx);
1009 	}
1010 }
1011 
1012 /*
1013  * Allocate DMA memory for transmit descriptor fragments.
1014  * Only one map is used for all descriptors.
1015  */
1016 int
1017 xge_alloc_txmem(struct xge_softc *sc)
1018 {
1019 	struct txd *txp;
1020 	bus_dma_segment_t seg;
1021 	bus_addr_t txdp;
1022 	void *kva;
1023 	int i, rseg, state;
1024 
1025 #define TXMAPSZ (NTXDESCS*NTXFRAGS*sizeof(struct txd))
1026 	state = 0;
1027 	if (bus_dmamem_alloc(sc->sc_dmat, TXMAPSZ, PAGE_SIZE, 0,
1028 	    &seg, 1, &rseg, BUS_DMA_NOWAIT))
1029 		goto err;
1030 	state++;
1031 	if (bus_dmamem_map(sc->sc_dmat, &seg, rseg, TXMAPSZ, &kva,
1032 	    BUS_DMA_NOWAIT))
1033 		goto err;
1034 
1035 	state++;
1036 	if (bus_dmamap_create(sc->sc_dmat, TXMAPSZ, 1, TXMAPSZ, 0,
1037 	    BUS_DMA_NOWAIT, &sc->sc_txmap))
1038 		goto err;
1039 	state++;
1040 	if (bus_dmamap_load(sc->sc_dmat, sc->sc_txmap,
1041 	    kva, TXMAPSZ, NULL, BUS_DMA_NOWAIT))
1042 		goto err;
1043 
1044 	/* setup transmit array pointers */
1045 	txp = (struct txd *)kva;
1046 	txdp = seg.ds_addr;
1047 	for (txp = (struct txd *)kva, i = 0; i < NTXDESCS; i++) {
1048 		sc->sc_txd[i] = txp;
1049 		sc->sc_txdp[i] = txdp;
1050 		txp += NTXFRAGS;
1051 		txdp += (NTXFRAGS * sizeof(struct txd));
1052 	}
1053 
1054 	return 0;
1055 
1056 err:
1057 	if (state > 2)
1058 		bus_dmamap_destroy(sc->sc_dmat, sc->sc_txmap);
1059 	if (state > 1)
1060 		bus_dmamem_unmap(sc->sc_dmat, kva, TXMAPSZ);
1061 	if (state > 0)
1062 		bus_dmamem_free(sc->sc_dmat, &seg, rseg);
1063 	return ENOBUFS;
1064 }
1065 
1066 /*
1067  * Allocate DMA memory for receive descriptor,
1068  * only one map is used for all descriptors.
1069  * link receive descriptor pages together.
1070  */
1071 int
1072 xge_alloc_rxmem(struct xge_softc *sc)
1073 {
1074 	struct rxd_4k *rxpp;
1075 	bus_dma_segment_t seg;
1076 	void *kva;
1077 	int i, rseg, state;
1078 
1079 	/* sanity check */
1080 	if (sizeof(struct rxd_4k) != XGE_PAGE) {
1081 		printf("bad compiler struct alignment, %d != %d\n",
1082 		    (int)sizeof(struct rxd_4k), XGE_PAGE);
1083 		return EINVAL;
1084 	}
1085 
1086 	state = 0;
1087 	if (bus_dmamem_alloc(sc->sc_dmat, RXMAPSZ, PAGE_SIZE, 0,
1088 	    &seg, 1, &rseg, BUS_DMA_NOWAIT))
1089 		goto err;
1090 	state++;
1091 	if (bus_dmamem_map(sc->sc_dmat, &seg, rseg, RXMAPSZ, &kva,
1092 	    BUS_DMA_NOWAIT))
1093 		goto err;
1094 
1095 	state++;
1096 	if (bus_dmamap_create(sc->sc_dmat, RXMAPSZ, 1, RXMAPSZ, 0,
1097 	    BUS_DMA_NOWAIT, &sc->sc_rxmap))
1098 		goto err;
1099 	state++;
1100 	if (bus_dmamap_load(sc->sc_dmat, sc->sc_rxmap,
1101 	    kva, RXMAPSZ, NULL, BUS_DMA_NOWAIT))
1102 		goto err;
1103 
1104 	/* setup receive page link pointers */
1105 	for (rxpp = (struct rxd_4k *)kva, i = 0; i < NRXPAGES; i++, rxpp++) {
1106 		sc->sc_rxd_4k[i] = rxpp;
1107 		rxpp->r4_next = (uint64_t)sc->sc_rxmap->dm_segs[0].ds_addr +
1108 		    (i*sizeof(struct rxd_4k)) + sizeof(struct rxd_4k);
1109 	}
1110 	sc->sc_rxd_4k[NRXPAGES-1]->r4_next =
1111 	    (uint64_t)sc->sc_rxmap->dm_segs[0].ds_addr;
1112 
1113 	return 0;
1114 
1115 err:
1116 	if (state > 2)
1117 		bus_dmamap_destroy(sc->sc_dmat, sc->sc_txmap);
1118 	if (state > 1)
1119 		bus_dmamem_unmap(sc->sc_dmat, kva, TXMAPSZ);
1120 	if (state > 0)
1121 		bus_dmamem_free(sc->sc_dmat, &seg, rseg);
1122 	return ENOBUFS;
1123 }
1124 
1125 
1126 /*
1127  * Add a new mbuf chain to descriptor id.
1128  */
1129 int
1130 xge_add_rxbuf(struct xge_softc *sc, int id)
1131 {
1132 	struct rxdesc *rxd;
1133 	struct mbuf *m[5];
1134 	int page, desc, error;
1135 #if RX_MODE == RX_MODE_5
1136 	int i;
1137 #endif
1138 
1139 	page = id/NDESC_BUFMODE;
1140 	desc = id%NDESC_BUFMODE;
1141 
1142 	rxd = &sc->sc_rxd_4k[page]->r4_rxd[desc];
1143 
1144 	/*
1145 	 * Allocate mbufs.
1146 	 * Currently five mbufs and two clusters are used,
1147 	 * the hardware will put (ethernet, ip, tcp/udp) headers in
1148 	 * their own buffer and the clusters are only used for data.
1149 	 */
1150 #if RX_MODE == RX_MODE_1
1151 	MGETHDR(m[0], M_DONTWAIT, MT_DATA);
1152 	if (m[0] == NULL)
1153 		return ENOBUFS;
1154 	MCLGET(m[0], M_DONTWAIT);
1155 	if ((m[0]->m_flags & M_EXT) == 0) {
1156 		m_freem(m[0]);
1157 		return ENOBUFS;
1158 	}
1159 	m[0]->m_len = m[0]->m_pkthdr.len = m[0]->m_ext.ext_size;
1160 #elif RX_MODE == RX_MODE_3
1161 #error missing rxmode 3.
1162 #elif RX_MODE == RX_MODE_5
1163 	MGETHDR(m[0], M_DONTWAIT, MT_DATA);
1164 	for (i = 1; i < 5; i++) {
1165 		MGET(m[i], M_DONTWAIT, MT_DATA);
1166 	}
1167 	if (m[3])
1168 		MCLGET(m[3], M_DONTWAIT);
1169 	if (m[4])
1170 		MCLGET(m[4], M_DONTWAIT);
1171 	if (!m[0] || !m[1] || !m[2] || !m[3] || !m[4] ||
1172 	    ((m[3]->m_flags & M_EXT) == 0) || ((m[4]->m_flags & M_EXT) == 0)) {
1173 		/* Out of something */
1174 		for (i = 0; i < 5; i++)
1175 			if (m[i] != NULL)
1176 				m_free(m[i]);
1177 		return ENOBUFS;
1178 	}
1179 	/* Link'em together */
1180 	m[0]->m_next = m[1];
1181 	m[1]->m_next = m[2];
1182 	m[2]->m_next = m[3];
1183 	m[3]->m_next = m[4];
1184 #else
1185 #error bad mode RX_MODE
1186 #endif
1187 
1188 	if (sc->sc_rxb[id])
1189 		bus_dmamap_unload(sc->sc_dmat, sc->sc_rxm[id]);
1190 	sc->sc_rxb[id] = m[0];
1191 
1192 	error = bus_dmamap_load_mbuf(sc->sc_dmat, sc->sc_rxm[id], m[0],
1193 	    BUS_DMA_READ|BUS_DMA_NOWAIT);
1194 	if (error)
1195 		return error;
1196 	bus_dmamap_sync(sc->sc_dmat, sc->sc_rxm[id], 0,
1197 	    sc->sc_rxm[id]->dm_mapsize, BUS_DMASYNC_PREREAD);
1198 
1199 #if RX_MODE == RX_MODE_1
1200 	rxd->rxd_control2 = RXD_MKCTL2(m[0]->m_len, 0, 0);
1201 	rxd->rxd_buf0 = (uint64_t)sc->sc_rxm[id]->dm_segs[0].ds_addr;
1202 	rxd->rxd_control1 = RXD_CTL1_OWN;
1203 #elif RX_MODE == RX_MODE_3
1204 #elif RX_MODE == RX_MODE_5
1205 	rxd->rxd_control3 = RXD_MKCTL3(0, m[3]->m_len, m[4]->m_len);
1206 	rxd->rxd_control2 = RXD_MKCTL2(m[0]->m_len, m[1]->m_len, m[2]->m_len);
1207 	rxd->rxd_buf0 = (uint64_t)sc->sc_rxm[id]->dm_segs[0].ds_addr;
1208 	rxd->rxd_buf1 = (uint64_t)sc->sc_rxm[id]->dm_segs[1].ds_addr;
1209 	rxd->rxd_buf2 = (uint64_t)sc->sc_rxm[id]->dm_segs[2].ds_addr;
1210 	rxd->rxd_buf3 = (uint64_t)sc->sc_rxm[id]->dm_segs[3].ds_addr;
1211 	rxd->rxd_buf4 = (uint64_t)sc->sc_rxm[id]->dm_segs[4].ds_addr;
1212 	rxd->rxd_control1 = RXD_CTL1_OWN;
1213 #endif
1214 
1215 	XGE_RXSYNC(id, BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE);
1216 	return 0;
1217 }
1218 
1219 /*
1220  * These magics comes from the FreeBSD driver.
1221  */
1222 int
1223 xge_setup_xgxs(struct xge_softc *sc)
1224 {
1225 	/* The magic numbers are described in the users guide */
1226 
1227 	/* Writing to MDIO 0x8000 (Global Config 0) */
1228 	PIF_WCSR(DTX_CONTROL, 0x8000051500000000ULL); DELAY(50);
1229 	PIF_WCSR(DTX_CONTROL, 0x80000515000000E0ULL); DELAY(50);
1230 	PIF_WCSR(DTX_CONTROL, 0x80000515D93500E4ULL); DELAY(50);
1231 
1232 	/* Writing to MDIO 0x8000 (Global Config 1) */
1233 	PIF_WCSR(DTX_CONTROL, 0x8001051500000000ULL); DELAY(50);
1234 	PIF_WCSR(DTX_CONTROL, 0x80010515000000e0ULL); DELAY(50);
1235 	PIF_WCSR(DTX_CONTROL, 0x80010515001e00e4ULL); DELAY(50);
1236 
1237 	/* Reset the Gigablaze */
1238 	PIF_WCSR(DTX_CONTROL, 0x8002051500000000ULL); DELAY(50);
1239 	PIF_WCSR(DTX_CONTROL, 0x80020515000000E0ULL); DELAY(50);
1240 	PIF_WCSR(DTX_CONTROL, 0x80020515F21000E4ULL); DELAY(50);
1241 
1242 	/* read the pole settings */
1243 	PIF_WCSR(DTX_CONTROL, 0x8000051500000000ULL); DELAY(50);
1244 	PIF_WCSR(DTX_CONTROL, 0x80000515000000e0ULL); DELAY(50);
1245 	PIF_WCSR(DTX_CONTROL, 0x80000515000000ecULL); DELAY(50);
1246 
1247 	PIF_WCSR(DTX_CONTROL, 0x8001051500000000ULL); DELAY(50);
1248 	PIF_WCSR(DTX_CONTROL, 0x80010515000000e0ULL); DELAY(50);
1249 	PIF_WCSR(DTX_CONTROL, 0x80010515000000ecULL); DELAY(50);
1250 
1251 	PIF_WCSR(DTX_CONTROL, 0x8002051500000000ULL); DELAY(50);
1252 	PIF_WCSR(DTX_CONTROL, 0x80020515000000e0ULL); DELAY(50);
1253 	PIF_WCSR(DTX_CONTROL, 0x80020515000000ecULL); DELAY(50);
1254 
1255 	/* Workaround for TX Lane XAUI initialization error.
1256 	   Read Xpak PHY register 24 for XAUI lane status */
1257 	PIF_WCSR(DTX_CONTROL, 0x0018040000000000ULL); DELAY(50);
1258 	PIF_WCSR(DTX_CONTROL, 0x00180400000000e0ULL); DELAY(50);
1259 	PIF_WCSR(DTX_CONTROL, 0x00180400000000ecULL); DELAY(50);
1260 
1261 	/*
1262 	 * Reading the MDIO control with value 0x1804001c0F001c
1263 	 * means the TxLanes were already in sync
1264 	 * Reading the MDIO control with value 0x1804000c0x001c
1265 	 * means some TxLanes are not in sync where x is a 4-bit
1266 	 * value representing each lanes
1267 	 */
1268 #if 0
1269 	val = PIF_RCSR(MDIO_CONTROL);
1270 	if (val != 0x1804001c0F001cULL) {
1271 		printf("%s: MDIO_CONTROL: %llx != %llx\n",
1272 		    XNAME, val, 0x1804001c0F001cULL);
1273 		return 1;
1274 	}
1275 #endif
1276 
1277 	/* Set and remove the DTE XS INTLoopBackN */
1278 	PIF_WCSR(DTX_CONTROL, 0x0000051500000000ULL); DELAY(50);
1279 	PIF_WCSR(DTX_CONTROL, 0x00000515604000e0ULL); DELAY(50);
1280 	PIF_WCSR(DTX_CONTROL, 0x00000515604000e4ULL); DELAY(50);
1281 	PIF_WCSR(DTX_CONTROL, 0x00000515204000e4ULL); DELAY(50);
1282 	PIF_WCSR(DTX_CONTROL, 0x00000515204000ecULL); DELAY(50);
1283 
1284 #if 0
1285 	/* Reading the DTX control register Should be 0x5152040001c */
1286 	val = PIF_RCSR(DTX_CONTROL);
1287 	if (val != 0x5152040001cULL) {
1288 		printf("%s: DTX_CONTROL: %llx != %llx\n",
1289 		    XNAME, val, 0x5152040001cULL);
1290 		return 1;
1291 	}
1292 #endif
1293 
1294 	PIF_WCSR(MDIO_CONTROL, 0x0018040000000000ULL); DELAY(50);
1295 	PIF_WCSR(MDIO_CONTROL, 0x00180400000000e0ULL); DELAY(50);
1296 	PIF_WCSR(MDIO_CONTROL, 0x00180400000000ecULL); DELAY(50);
1297 
1298 #if 0
1299 	/* Reading the MIOD control should be 0x1804001c0f001c */
1300 	val = PIF_RCSR(MDIO_CONTROL);
1301 	if (val != 0x1804001c0f001cULL) {
1302 		printf("%s: MDIO_CONTROL2: %llx != %llx\n",
1303 		    XNAME, val, 0x1804001c0f001cULL);
1304 		return 1;
1305 	}
1306 #endif
1307 	return 0;
1308 }
1309