xref: /netbsd-src/sys/dev/pci/if_xge.c (revision 181254a7b1bdde6873432bffef2d2decc4b5c22f)
1 /*      $NetBSD: if_xge.c,v 1.34 2020/03/01 15:54:18 thorpej Exp $ */
2 
3 /*
4  * Copyright (c) 2004, SUNET, Swedish University Computer Network.
5  * All rights reserved.
6  *
7  * Written by Anders Magnusson for SUNET, Swedish University Computer Network.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. All advertising materials mentioning features or use of this software
18  *    must display the following acknowledgement:
19  *      This product includes software developed for the NetBSD Project by
20  *      SUNET, Swedish University Computer Network.
21  * 4. The name of SUNET may not be used to endorse or promote products
22  *    derived from this software without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY SUNET ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
26  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
27  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL SUNET
28  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34  * POSSIBILITY OF SUCH DAMAGE.
35  */
36 
37 /*
38  * Device driver for the S2io Xframe Ten Gigabit Ethernet controller.
39  *
40  * TODO (in no specific order):
41  *	HW VLAN support.
42  *	IPv6 HW cksum.
43  */
44 
45 #include <sys/cdefs.h>
46 __KERNEL_RCSID(0, "$NetBSD: if_xge.c,v 1.34 2020/03/01 15:54:18 thorpej Exp $");
47 
48 
49 #include <sys/param.h>
50 #include <sys/systm.h>
51 #include <sys/mbuf.h>
52 #include <sys/malloc.h>
53 #include <sys/kernel.h>
54 #include <sys/proc.h>
55 #include <sys/socket.h>
56 #include <sys/device.h>
57 
58 #include <net/if.h>
59 #include <net/if_dl.h>
60 #include <net/if_media.h>
61 #include <net/if_ether.h>
62 #include <net/bpf.h>
63 
64 #include <sys/bus.h>
65 #include <sys/intr.h>
66 #include <machine/endian.h>
67 
68 #include <dev/mii/mii.h>
69 #include <dev/mii/miivar.h>
70 
71 #include <dev/pci/pcivar.h>
72 #include <dev/pci/pcireg.h>
73 #include <dev/pci/pcidevs.h>
74 
75 #include <dev/pci/if_xgereg.h>
76 
77 /*
78  * Some tunable constants, tune with care!
79  */
80 #define RX_MODE		RX_MODE_1  /* Receive mode (buffer usage, see below) */
81 #define NRXDESCS	1016	   /* # of receive descriptors (requested) */
82 #define NTXDESCS	8192	   /* Number of transmit descriptors */
83 #define NTXFRAGS	100	   /* Max fragments per packet */
84 #define XGE_EVENT_COUNTERS	   /* Instrumentation */
85 
86 /*
87  * Receive buffer modes; 1, 3 or 5 buffers.
88  */
89 #define RX_MODE_1 1
90 #define RX_MODE_3 3
91 #define RX_MODE_5 5
92 
93 /*
94  * Use clever macros to avoid a bunch of #ifdef's.
95  */
96 #define XCONCAT3(x, y, z) x ## y ## z
97 #define CONCAT3(x, y, z) XCONCAT3(x, y, z)
98 #define NDESC_BUFMODE CONCAT3(NDESC_, RX_MODE, BUFMODE)
99 #define rxd_4k CONCAT3(rxd, RX_MODE, _4k)
100 #define rxdesc ___CONCAT(rxd, RX_MODE)
101 
102 #define NEXTTX(x)	(((x)+1) % NTXDESCS)
103 #define NRXFRAGS	RX_MODE /* hardware imposed frags */
104 #define NRXPAGES	((NRXDESCS/NDESC_BUFMODE)+1)
105 #define NRXREAL		(NRXPAGES*NDESC_BUFMODE)
106 #define RXMAPSZ		(NRXPAGES*PAGE_SIZE)
107 
108 #ifdef XGE_EVENT_COUNTERS
109 #define XGE_EVCNT_INCR(ev)	(ev)->ev_count++
110 #else
111 #define XGE_EVCNT_INCR(ev)	/* nothing */
112 #endif
113 
114 /*
115  * Magics to fix a bug when the mac address can't be read correctly.
116  * Comes from the Linux driver.
117  */
118 static uint64_t fix_mac[] = {
119 	0x0060000000000000ULL, 0x0060600000000000ULL,
120 	0x0040600000000000ULL, 0x0000600000000000ULL,
121 	0x0020600000000000ULL, 0x0060600000000000ULL,
122 	0x0020600000000000ULL, 0x0060600000000000ULL,
123 	0x0020600000000000ULL, 0x0060600000000000ULL,
124 	0x0020600000000000ULL, 0x0060600000000000ULL,
125 	0x0020600000000000ULL, 0x0060600000000000ULL,
126 	0x0020600000000000ULL, 0x0060600000000000ULL,
127 	0x0020600000000000ULL, 0x0060600000000000ULL,
128 	0x0020600000000000ULL, 0x0060600000000000ULL,
129 	0x0020600000000000ULL, 0x0060600000000000ULL,
130 	0x0020600000000000ULL, 0x0060600000000000ULL,
131 	0x0020600000000000ULL, 0x0000600000000000ULL,
132 	0x0040600000000000ULL, 0x0060600000000000ULL,
133 };
134 
135 
136 struct xge_softc {
137 	device_t sc_dev;
138 	struct ethercom sc_ethercom;
139 #define sc_if sc_ethercom.ec_if
140 	bus_dma_tag_t sc_dmat;
141 	bus_space_tag_t sc_st;
142 	bus_space_handle_t sc_sh;
143 	bus_space_tag_t sc_txt;
144 	bus_space_handle_t sc_txh;
145 	void *sc_ih;
146 
147 	struct ifmedia xena_media;
148 	pcireg_t sc_pciregs[16];
149 
150 	/* Transmit structures */
151 	struct txd *sc_txd[NTXDESCS];	/* transmit frags array */
152 	bus_addr_t sc_txdp[NTXDESCS];	/* bus address of transmit frags */
153 	bus_dmamap_t sc_txm[NTXDESCS];	/* transmit frags map */
154 	struct mbuf *sc_txb[NTXDESCS];	/* transmit mbuf pointer */
155 	int sc_nexttx, sc_lasttx;
156 	bus_dmamap_t sc_txmap;		/* transmit descriptor map */
157 
158 	/* Receive data */
159 	bus_dmamap_t sc_rxmap;		/* receive descriptor map */
160 	struct rxd_4k *sc_rxd_4k[NRXPAGES]; /* receive desc pages */
161 	bus_dmamap_t sc_rxm[NRXREAL];	/* receive buffer map */
162 	struct mbuf *sc_rxb[NRXREAL];	/* mbufs on receive descriptors */
163 	int sc_nextrx;			/* next descriptor to check */
164 
165 #ifdef XGE_EVENT_COUNTERS
166 	struct evcnt sc_intr;	/* # of interrupts */
167 	struct evcnt sc_txintr;	/* # of transmit interrupts */
168 	struct evcnt sc_rxintr;	/* # of receive interrupts */
169 	struct evcnt sc_txqe;	/* # of xmit intrs when board queue empty */
170 #endif
171 };
172 
173 static int xge_match(device_t parent, cfdata_t cf, void *aux);
174 static void xge_attach(device_t parent, device_t self, void *aux);
175 static int xge_alloc_txmem(struct xge_softc *);
176 static int xge_alloc_rxmem(struct xge_softc *);
177 static void xge_start(struct ifnet *);
178 static void xge_stop(struct ifnet *, int);
179 static int xge_add_rxbuf(struct xge_softc *, int);
180 static void xge_mcast_filter(struct xge_softc *sc);
181 static int xge_setup_xgxs(struct xge_softc *sc);
182 static int xge_ioctl(struct ifnet *ifp, u_long cmd, void *data);
183 static int xge_init(struct ifnet *ifp);
184 static void xge_ifmedia_status(struct ifnet *, struct ifmediareq *);
185 static int xge_xgmii_mediachange(struct ifnet *);
186 static int xge_intr(void  *);
187 
188 /*
189  * Helpers to address registers.
190  */
191 #define PIF_WCSR(csr, val)	pif_wcsr(sc, csr, val)
192 #define PIF_RCSR(csr)		pif_rcsr(sc, csr)
193 #define TXP_WCSR(csr, val)	txp_wcsr(sc, csr, val)
194 #define PIF_WKEY(csr, val)	pif_wkey(sc, csr, val)
195 
196 static inline void
197 pif_wcsr(struct xge_softc *sc, bus_size_t csr, uint64_t val)
198 {
199 	uint32_t lval, hval;
200 
201 	lval = val&0xffffffff;
202 	hval = val>>32;
203 	bus_space_write_4(sc->sc_st, sc->sc_sh, csr, lval);
204 	bus_space_write_4(sc->sc_st, sc->sc_sh, csr+4, hval);
205 }
206 
207 static inline uint64_t
208 pif_rcsr(struct xge_softc *sc, bus_size_t csr)
209 {
210 	uint64_t val, val2;
211 	val = bus_space_read_4(sc->sc_st, sc->sc_sh, csr);
212 	val2 = bus_space_read_4(sc->sc_st, sc->sc_sh, csr+4);
213 	val |= (val2 << 32);
214 	return val;
215 }
216 
217 static inline void
218 txp_wcsr(struct xge_softc *sc, bus_size_t csr, uint64_t val)
219 {
220 	uint32_t lval, hval;
221 
222 	lval = val&0xffffffff;
223 	hval = val>>32;
224 	bus_space_write_4(sc->sc_txt, sc->sc_txh, csr, lval);
225 	bus_space_write_4(sc->sc_txt, sc->sc_txh, csr+4, hval);
226 }
227 
228 
229 static inline void
230 pif_wkey(struct xge_softc *sc, bus_size_t csr, uint64_t val)
231 {
232 	uint32_t lval, hval;
233 
234 	lval = val&0xffffffff;
235 	hval = val>>32;
236 	PIF_WCSR(RMAC_CFG_KEY, RMAC_KEY_VALUE);
237 	bus_space_write_4(sc->sc_st, sc->sc_sh, csr, lval);
238 	PIF_WCSR(RMAC_CFG_KEY, RMAC_KEY_VALUE);
239 	bus_space_write_4(sc->sc_st, sc->sc_sh, csr+4, hval);
240 }
241 
242 
243 CFATTACH_DECL_NEW(xge, sizeof(struct xge_softc),
244     xge_match, xge_attach, NULL, NULL);
245 
246 #define XNAME device_xname(sc->sc_dev)
247 
248 #define XGE_RXSYNC(desc, what) \
249 	bus_dmamap_sync(sc->sc_dmat, sc->sc_rxmap, \
250 	(desc/NDESC_BUFMODE) * XGE_PAGE + sizeof(struct rxdesc) * \
251 	(desc%NDESC_BUFMODE), sizeof(struct rxdesc), what)
252 #define XGE_RXD(desc)	&sc->sc_rxd_4k[desc/NDESC_BUFMODE]-> \
253 	r4_rxd[desc%NDESC_BUFMODE]
254 
255 /*
256  * Non-tunable constants.
257  */
258 #define XGE_MAX_MTU		9600
259 #define	XGE_IP_MAXPACKET	65535	/* same as IP_MAXPACKET */
260 
261 static int
262 xge_match(device_t parent, cfdata_t cf, void *aux)
263 {
264 	struct pci_attach_args *pa = aux;
265 
266 	if (PCI_VENDOR(pa->pa_id) == PCI_VENDOR_S2IO &&
267 	    PCI_PRODUCT(pa->pa_id) == PCI_PRODUCT_S2IO_XFRAME)
268 		return 1;
269 
270 	return 0;
271 }
272 
273 void
274 xge_attach(device_t parent, device_t self, void *aux)
275 {
276 	struct pci_attach_args *pa = aux;
277 	struct xge_softc *sc;
278 	struct ifnet *ifp;
279 	pcireg_t memtype;
280 	pci_intr_handle_t ih;
281 	const char *intrstr = NULL;
282 	pci_chipset_tag_t pc = pa->pa_pc;
283 	uint8_t enaddr[ETHER_ADDR_LEN];
284 	uint64_t val;
285 	int i;
286 	char intrbuf[PCI_INTRSTR_LEN];
287 
288 	sc = device_private(self);
289 	sc->sc_dev = self;
290 
291 	if (pci_dma64_available(pa))
292 		sc->sc_dmat = pa->pa_dmat64;
293 	else
294 		sc->sc_dmat = pa->pa_dmat;
295 
296 	/* Get BAR0 address */
297 	memtype = pci_mapreg_type(pa->pa_pc, pa->pa_tag, XGE_PIF_BAR);
298 	if (pci_mapreg_map(pa, XGE_PIF_BAR, memtype, 0,
299 	    &sc->sc_st, &sc->sc_sh, 0, 0)) {
300 		aprint_error("%s: unable to map PIF BAR registers\n", XNAME);
301 		return;
302 	}
303 
304 	memtype = pci_mapreg_type(pa->pa_pc, pa->pa_tag, XGE_TXP_BAR);
305 	if (pci_mapreg_map(pa, XGE_TXP_BAR, memtype, 0,
306 	    &sc->sc_txt, &sc->sc_txh, 0, 0)) {
307 		aprint_error("%s: unable to map TXP BAR registers\n", XNAME);
308 		return;
309 	}
310 
311 	/* Save PCI config space */
312 	for (i = 0; i < 64; i += 4)
313 		sc->sc_pciregs[i/4] = pci_conf_read(pa->pa_pc, pa->pa_tag, i);
314 
315 #if BYTE_ORDER == LITTLE_ENDIAN
316 	val = (uint64_t)0xFFFFFFFFFFFFFFFFULL;
317 	val &= ~(TxF_R_SE | RxF_W_SE);
318 	PIF_WCSR(SWAPPER_CTRL, val);
319 	PIF_WCSR(SWAPPER_CTRL, val);
320 #elif BYTE_ORDER == BIG_ENDIAN
321 	/* do nothing */
322 #else
323 #error bad endianness!
324 #endif
325 
326 	if ((val = PIF_RCSR(PIF_RD_SWAPPER_Fb)) != SWAPPER_MAGIC) {
327 		aprint_error("%s: failed configuring endian, %llx != %llx!\n",
328 		    XNAME, (unsigned long long)val, SWAPPER_MAGIC);
329 		return;
330 	}
331 
332 	/*
333 	 * The MAC addr may be all FF's, which is not good.
334 	 * Resolve it by writing some magics to GPIO_CONTROL and
335 	 * force a chip reset to read in the serial eeprom again.
336 	 */
337 	for (i = 0; i < sizeof(fix_mac)/sizeof(fix_mac[0]); i++) {
338 		PIF_WCSR(GPIO_CONTROL, fix_mac[i]);
339 		PIF_RCSR(GPIO_CONTROL);
340 	}
341 
342 	/*
343 	 * Reset the chip and restore the PCI registers.
344 	 */
345 	PIF_WCSR(SW_RESET, 0xa5a5a50000000000ULL);
346 	DELAY(500000);
347 	for (i = 0; i < 64; i += 4)
348 		pci_conf_write(pa->pa_pc, pa->pa_tag, i, sc->sc_pciregs[i/4]);
349 
350 	/*
351 	 * Restore the byte order registers.
352 	 */
353 #if BYTE_ORDER == LITTLE_ENDIAN
354 	val = (uint64_t)0xFFFFFFFFFFFFFFFFULL;
355 	val &= ~(TxF_R_SE | RxF_W_SE);
356 	PIF_WCSR(SWAPPER_CTRL, val);
357 	PIF_WCSR(SWAPPER_CTRL, val);
358 #elif BYTE_ORDER == BIG_ENDIAN
359 	/* do nothing */
360 #else
361 #error bad endianness!
362 #endif
363 
364 	if ((val = PIF_RCSR(PIF_RD_SWAPPER_Fb)) != SWAPPER_MAGIC) {
365 		aprint_error("%s: failed configuring endian2, %llx != %llx!\n",
366 		    XNAME, (unsigned long long)val, SWAPPER_MAGIC);
367 		return;
368 	}
369 
370 	/*
371 	 * XGXS initialization.
372 	 */
373 	/* 29, reset */
374 	PIF_WCSR(SW_RESET, 0);
375 	DELAY(500000);
376 
377 	/* 30, configure XGXS transceiver */
378 	xge_setup_xgxs(sc);
379 
380 	/* 33, program MAC address (not needed here) */
381 	/* Get ethernet address */
382 	PIF_WCSR(RMAC_ADDR_CMD_MEM,
383 	    RMAC_ADDR_CMD_MEM_STR | RMAC_ADDR_CMD_MEM_OFF(0));
384 	while (PIF_RCSR(RMAC_ADDR_CMD_MEM) & RMAC_ADDR_CMD_MEM_STR)
385 		;
386 	val = PIF_RCSR(RMAC_ADDR_DATA0_MEM);
387 	for (i = 0; i < ETHER_ADDR_LEN; i++)
388 		enaddr[i] = (uint8_t)(val >> (56 - (8*i)));
389 
390 	/*
391 	 * Get memory for transmit descriptor lists.
392 	 */
393 	if (xge_alloc_txmem(sc)) {
394 		aprint_error("%s: failed allocating txmem.\n", XNAME);
395 		return;
396 	}
397 
398 	/* 9 and 10 - set FIFO number/prio */
399 	PIF_WCSR(TX_FIFO_P0, TX_FIFO_LEN0(NTXDESCS));
400 	PIF_WCSR(TX_FIFO_P1, 0ULL);
401 	PIF_WCSR(TX_FIFO_P2, 0ULL);
402 	PIF_WCSR(TX_FIFO_P3, 0ULL);
403 
404 	/* 11, XXX set round-robin prio? */
405 
406 	/* 12, enable transmit FIFO */
407 	val = PIF_RCSR(TX_FIFO_P0);
408 	val |= TX_FIFO_ENABLE;
409 	PIF_WCSR(TX_FIFO_P0, val);
410 
411 	/* 13, disable some error checks */
412 	PIF_WCSR(TX_PA_CFG,
413 	    TX_PA_CFG_IFR | TX_PA_CFG_ISO | TX_PA_CFG_ILC | TX_PA_CFG_ILE);
414 
415 	/*
416 	 * Create transmit DMA maps.
417 	 * Make them large for TSO.
418 	 */
419 	for (i = 0; i < NTXDESCS; i++) {
420 		if (bus_dmamap_create(sc->sc_dmat, XGE_IP_MAXPACKET,
421 		    NTXFRAGS, MCLBYTES, 0, 0, &sc->sc_txm[i])) {
422 			aprint_error("%s: cannot create TX DMA maps\n", XNAME);
423 			return;
424 		}
425 	}
426 
427 	sc->sc_lasttx = NTXDESCS-1;
428 
429 	/*
430 	 * RxDMA initialization.
431 	 * Only use one out of 8 possible receive queues.
432 	 */
433 	if (xge_alloc_rxmem(sc)) {	/* allocate rx descriptor memory */
434 		aprint_error("%s: failed allocating rxmem\n", XNAME);
435 		return;
436 	}
437 
438 	/* Create receive buffer DMA maps */
439 	for (i = 0; i < NRXREAL; i++) {
440 		if (bus_dmamap_create(sc->sc_dmat, XGE_MAX_MTU,
441 		    NRXFRAGS, MCLBYTES, 0, 0, &sc->sc_rxm[i])) {
442 			aprint_error("%s: cannot create RX DMA maps\n", XNAME);
443 			return;
444 		}
445 	}
446 
447 	/* allocate mbufs to receive descriptors */
448 	for (i = 0; i < NRXREAL; i++)
449 		if (xge_add_rxbuf(sc, i))
450 			panic("out of mbufs too early");
451 
452 	/* 14, setup receive ring priority */
453 	PIF_WCSR(RX_QUEUE_PRIORITY, 0ULL); /* only use one ring */
454 
455 	/* 15, setup receive ring round-robin calendar */
456 	PIF_WCSR(RX_W_ROUND_ROBIN_0, 0ULL); /* only use one ring */
457 	PIF_WCSR(RX_W_ROUND_ROBIN_1, 0ULL);
458 	PIF_WCSR(RX_W_ROUND_ROBIN_2, 0ULL);
459 	PIF_WCSR(RX_W_ROUND_ROBIN_3, 0ULL);
460 	PIF_WCSR(RX_W_ROUND_ROBIN_4, 0ULL);
461 
462 	/* 16, write receive ring start address */
463 	PIF_WCSR(PRC_RXD0_0, (uint64_t)sc->sc_rxmap->dm_segs[0].ds_addr);
464 	/* PRC_RXD0_[1-7] are not used */
465 
466 	/* 17, Setup alarm registers */
467 	PIF_WCSR(PRC_ALARM_ACTION, 0ULL); /* Default everything to retry */
468 
469 	/* 18, init receive ring controller */
470 #if RX_MODE == RX_MODE_1
471 	val = RING_MODE_1;
472 #elif RX_MODE == RX_MODE_3
473 	val = RING_MODE_3;
474 #else /* RX_MODE == RX_MODE_5 */
475 	val = RING_MODE_5;
476 #endif
477 	PIF_WCSR(PRC_CTRL_0, RC_IN_SVC | val);
478 	/* leave 1-7 disabled */
479 	/* XXXX snoop configuration? */
480 
481 	/* 19, set chip memory assigned to the queue */
482 	PIF_WCSR(RX_QUEUE_CFG, MC_QUEUE(0, 64)); /* all 64M to queue 0 */
483 
484 	/* 20, setup RLDRAM parameters */
485 	/* do not touch it for now */
486 
487 	/* 21, setup pause frame thresholds */
488 	/* so not touch the defaults */
489 	/* XXX - must 0xff be written as stated in the manual? */
490 
491 	/* 22, configure RED */
492 	/* we do not want to drop packets, so ignore */
493 
494 	/* 23, initiate RLDRAM */
495 	val = PIF_RCSR(MC_RLDRAM_MRS);
496 	val |= MC_QUEUE_SIZE_ENABLE | MC_RLDRAM_MRS_ENABLE;
497 	PIF_WCSR(MC_RLDRAM_MRS, val);
498 	DELAY(1000);
499 
500 	/*
501 	 * Setup interrupt policies.
502 	 */
503 	/* 40, Transmit interrupts */
504 	PIF_WCSR(TTI_DATA1_MEM, TX_TIMER_VAL(0x1ff) | TX_TIMER_AC |
505 	    TX_URNG_A(5) | TX_URNG_B(20) | TX_URNG_C(48));
506 	PIF_WCSR(TTI_DATA2_MEM,
507 	    TX_UFC_A(25) | TX_UFC_B(64) | TX_UFC_C(128) | TX_UFC_D(512));
508 	PIF_WCSR(TTI_COMMAND_MEM, TTI_CMD_MEM_WE | TTI_CMD_MEM_STROBE);
509 	while (PIF_RCSR(TTI_COMMAND_MEM) & TTI_CMD_MEM_STROBE)
510 		;
511 
512 	/* 41, Receive interrupts */
513 	PIF_WCSR(RTI_DATA1_MEM, RX_TIMER_VAL(0x800) | RX_TIMER_AC |
514 	    RX_URNG_A(5) | RX_URNG_B(20) | RX_URNG_C(50));
515 	PIF_WCSR(RTI_DATA2_MEM,
516 	    RX_UFC_A(64) | RX_UFC_B(128) | RX_UFC_C(256) | RX_UFC_D(512));
517 	PIF_WCSR(RTI_COMMAND_MEM, RTI_CMD_MEM_WE | RTI_CMD_MEM_STROBE);
518 	while (PIF_RCSR(RTI_COMMAND_MEM) & RTI_CMD_MEM_STROBE)
519 		;
520 
521 	/*
522 	 * Setup media stuff.
523 	 */
524 	sc->sc_ethercom.ec_ifmedia = &sc->xena_media;
525 	ifmedia_init(&sc->xena_media, IFM_IMASK, xge_xgmii_mediachange,
526 	    xge_ifmedia_status);
527 	ifmedia_add(&sc->xena_media, IFM_ETHER | IFM_10G_LR, 0, NULL);
528 	ifmedia_set(&sc->xena_media, IFM_ETHER | IFM_10G_LR);
529 
530 	aprint_normal("%s: Ethernet address %s\n", XNAME,
531 	    ether_sprintf(enaddr));
532 
533 	ifp = &sc->sc_ethercom.ec_if;
534 	strlcpy(ifp->if_xname, device_xname(sc->sc_dev), IFNAMSIZ);
535 	ifp->if_baudrate = 10000000000LL;
536 	ifp->if_init = xge_init;
537 	ifp->if_stop = xge_stop;
538 	ifp->if_softc = sc;
539 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
540 	ifp->if_ioctl = xge_ioctl;
541 	ifp->if_start = xge_start;
542 	IFQ_SET_MAXLEN(&ifp->if_snd, uimax(NTXDESCS - 1, IFQ_MAXLEN));
543 	IFQ_SET_READY(&ifp->if_snd);
544 
545 	/*
546 	 * Offloading capabilities.
547 	 */
548 	sc->sc_ethercom.ec_capabilities |=
549 	    ETHERCAP_JUMBO_MTU | ETHERCAP_VLAN_MTU;
550 	ifp->if_capabilities |=
551 	    IFCAP_CSUM_IPv4_Rx | IFCAP_CSUM_IPv4_Tx |
552 	    IFCAP_CSUM_TCPv4_Rx | IFCAP_CSUM_TCPv4_Tx |
553 	    IFCAP_CSUM_UDPv4_Rx | IFCAP_CSUM_UDPv4_Tx | IFCAP_TSOv4;
554 
555 	/*
556 	 * Attach the interface.
557 	 */
558 	if_attach(ifp);
559 	if_deferred_start_init(ifp, NULL);
560 	ether_ifattach(ifp, enaddr);
561 
562 	/*
563 	 * Setup interrupt vector before initializing.
564 	 */
565 	if (pci_intr_map(pa, &ih)) {
566 		aprint_error_dev(sc->sc_dev, "unable to map interrupt\n");
567 		return;
568 	}
569 	intrstr = pci_intr_string(pc, ih, intrbuf, sizeof(intrbuf));
570 	sc->sc_ih = pci_intr_establish_xname(pc, ih, IPL_NET, xge_intr, sc,
571 	    device_xname(self));
572 	if (sc->sc_ih == NULL) {
573 		aprint_error_dev(sc->sc_dev,
574 		    "unable to establish interrupt at %s\n",
575 		    intrstr ? intrstr : "<unknown>");
576 		return;
577 	}
578 	aprint_normal_dev(sc->sc_dev, "interrupting at %s\n", intrstr);
579 
580 #ifdef XGE_EVENT_COUNTERS
581 	evcnt_attach_dynamic(&sc->sc_intr, EVCNT_TYPE_MISC,
582 	    NULL, XNAME, "intr");
583 	evcnt_attach_dynamic(&sc->sc_txintr, EVCNT_TYPE_MISC,
584 	    NULL, XNAME, "txintr");
585 	evcnt_attach_dynamic(&sc->sc_rxintr, EVCNT_TYPE_MISC,
586 	    NULL, XNAME, "rxintr");
587 	evcnt_attach_dynamic(&sc->sc_txqe, EVCNT_TYPE_MISC,
588 	    NULL, XNAME, "txqe");
589 #endif
590 }
591 
592 void
593 xge_ifmedia_status(struct ifnet *ifp, struct ifmediareq *ifmr)
594 {
595 	struct xge_softc *sc = ifp->if_softc;
596 	uint64_t reg;
597 
598 	ifmr->ifm_status = IFM_AVALID;
599 	ifmr->ifm_active = IFM_ETHER | IFM_10G_LR;
600 
601 	reg = PIF_RCSR(ADAPTER_STATUS);
602 	if ((reg & (RMAC_REMOTE_FAULT | RMAC_LOCAL_FAULT)) == 0)
603 		ifmr->ifm_status |= IFM_ACTIVE;
604 }
605 
606 int
607 xge_xgmii_mediachange(struct ifnet *ifp)
608 {
609 	return 0;
610 }
611 
612 static void
613 xge_enable(struct xge_softc *sc)
614 {
615 	uint64_t val;
616 
617 	/* 2, enable adapter */
618 	val = PIF_RCSR(ADAPTER_CONTROL);
619 	val |= ADAPTER_EN;
620 	PIF_WCSR(ADAPTER_CONTROL, val);
621 
622 	/* 3, light the card enable led */
623 	val = PIF_RCSR(ADAPTER_CONTROL);
624 	val |= LED_ON;
625 	PIF_WCSR(ADAPTER_CONTROL, val);
626 	printf("%s: link up\n", XNAME);
627 
628 }
629 
630 int
631 xge_init(struct ifnet *ifp)
632 {
633 	struct xge_softc *sc = ifp->if_softc;
634 	uint64_t val;
635 
636 	if (ifp->if_flags & IFF_RUNNING)
637 		return 0;
638 
639 	/* 31+32, setup MAC config */
640 	PIF_WKEY(MAC_CFG, TMAC_EN | RMAC_EN | TMAC_APPEND_PAD |
641 	    RMAC_STRIP_FCS | RMAC_BCAST_EN | RMAC_DISCARD_PFRM | RMAC_PROM_EN);
642 
643 	DELAY(1000);
644 
645 	/* 54, ensure that the adapter is 'quiescent' */
646 	val = PIF_RCSR(ADAPTER_STATUS);
647 	if ((val & QUIESCENT) != QUIESCENT) {
648 		char buf[200];
649 		printf("%s: adapter not quiescent, aborting\n", XNAME);
650 		val = (val & QUIESCENT) ^ QUIESCENT;
651 		snprintb(buf, sizeof buf, QUIESCENT_BMSK, val);
652 		printf("%s: ADAPTER_STATUS missing bits %s\n", XNAME, buf);
653 		return 1;
654 	}
655 
656 	/* 56, enable the transmit laser */
657 	val = PIF_RCSR(ADAPTER_CONTROL);
658 	val |= EOI_TX_ON;
659 	PIF_WCSR(ADAPTER_CONTROL, val);
660 
661 	xge_enable(sc);
662 	/*
663 	 * Enable all interrupts
664 	 */
665 	PIF_WCSR(TX_TRAFFIC_MASK, 0);
666 	PIF_WCSR(RX_TRAFFIC_MASK, 0);
667 	PIF_WCSR(GENERAL_INT_MASK, 0);
668 	PIF_WCSR(TXPIC_INT_MASK, 0);
669 	PIF_WCSR(RXPIC_INT_MASK, 0);
670 	PIF_WCSR(MAC_INT_MASK, MAC_TMAC_INT); /* only from RMAC */
671 	PIF_WCSR(MAC_RMAC_ERR_MASK, ~RMAC_LINK_STATE_CHANGE_INT);
672 
673 
674 	/* Done... */
675 	ifp->if_flags |= IFF_RUNNING;
676 	ifp->if_flags &= ~IFF_OACTIVE;
677 
678 	return 0;
679 }
680 
681 static void
682 xge_stop(struct ifnet *ifp, int disable)
683 {
684 	struct xge_softc *sc = ifp->if_softc;
685 	uint64_t val;
686 
687 	val = PIF_RCSR(ADAPTER_CONTROL);
688 	val &= ~ADAPTER_EN;
689 	PIF_WCSR(ADAPTER_CONTROL, val);
690 
691 	while ((PIF_RCSR(ADAPTER_STATUS) & QUIESCENT) != QUIESCENT)
692 		;
693 }
694 
695 int
696 xge_intr(void *pv)
697 {
698 	struct xge_softc *sc = pv;
699 	struct txd *txd;
700 	struct ifnet *ifp = &sc->sc_if;
701 	bus_dmamap_t dmp;
702 	uint64_t val;
703 	int i, lasttx, plen;
704 
705 	val = PIF_RCSR(GENERAL_INT_STATUS);
706 	if (val == 0)
707 		return 0; /* no interrupt here */
708 
709 	XGE_EVCNT_INCR(&sc->sc_intr);
710 
711 	PIF_WCSR(GENERAL_INT_STATUS, val);
712 
713 	if ((val = PIF_RCSR(MAC_RMAC_ERR_REG)) & RMAC_LINK_STATE_CHANGE_INT) {
714 		/* Wait for quiescence */
715 		printf("%s: link down\n", XNAME);
716 		while ((PIF_RCSR(ADAPTER_STATUS) & QUIESCENT) != QUIESCENT)
717 			;
718 		PIF_WCSR(MAC_RMAC_ERR_REG, RMAC_LINK_STATE_CHANGE_INT);
719 
720 		val = PIF_RCSR(ADAPTER_STATUS);
721 		if ((val & (RMAC_REMOTE_FAULT | RMAC_LOCAL_FAULT)) == 0)
722 			xge_enable(sc); /* Only if link restored */
723 	}
724 
725 	if ((val = PIF_RCSR(TX_TRAFFIC_INT))) {
726 		XGE_EVCNT_INCR(&sc->sc_txintr);
727 		PIF_WCSR(TX_TRAFFIC_INT, val); /* clear interrupt bits */
728 	}
729 	/*
730 	 * Collect sent packets.
731 	 */
732 	lasttx = sc->sc_lasttx;
733 	while ((i = NEXTTX(sc->sc_lasttx)) != sc->sc_nexttx) {
734 		txd = sc->sc_txd[i];
735 		dmp = sc->sc_txm[i];
736 
737 		bus_dmamap_sync(sc->sc_dmat, dmp, 0,
738 		    dmp->dm_mapsize,
739 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
740 
741 		if (txd->txd_control1 & TXD_CTL1_OWN) {
742 			bus_dmamap_sync(sc->sc_dmat, dmp, 0,
743 			    dmp->dm_mapsize, BUS_DMASYNC_PREREAD);
744 			break;
745 		}
746 		bus_dmamap_unload(sc->sc_dmat, dmp);
747 		m_freem(sc->sc_txb[i]);
748 		if_statinc(ifp, if_opackets);
749 		sc->sc_lasttx = i;
750 	}
751 	if (i == sc->sc_nexttx) {
752 		XGE_EVCNT_INCR(&sc->sc_txqe);
753 	}
754 
755 	if (sc->sc_lasttx != lasttx)
756 		ifp->if_flags &= ~IFF_OACTIVE;
757 
758 	/* Try to get more packets on the wire */
759 	if_schedule_deferred_start(ifp);
760 
761 	if ((val = PIF_RCSR(RX_TRAFFIC_INT))) {
762 		XGE_EVCNT_INCR(&sc->sc_rxintr);
763 		PIF_WCSR(RX_TRAFFIC_INT, val); /* Clear interrupt bits */
764 	}
765 
766 	for (;;) {
767 		struct rxdesc *rxd;
768 		struct mbuf *m;
769 
770 		XGE_RXSYNC(sc->sc_nextrx,
771 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
772 
773 		rxd = XGE_RXD(sc->sc_nextrx);
774 		if (rxd->rxd_control1 & RXD_CTL1_OWN) {
775 			XGE_RXSYNC(sc->sc_nextrx, BUS_DMASYNC_PREREAD);
776 			break;
777 		}
778 
779 		/* Got a packet */
780 		m = sc->sc_rxb[sc->sc_nextrx];
781 #if RX_MODE == RX_MODE_1
782 		plen = m->m_len = RXD_CTL2_BUF0SIZ(rxd->rxd_control2);
783 #elif RX_MODE == RX_MODE_3
784 #error Fix rxmodes in xge_intr
785 #elif RX_MODE == RX_MODE_5
786 		plen = m->m_len = RXD_CTL2_BUF0SIZ(rxd->rxd_control2);
787 		plen += m->m_next->m_len = RXD_CTL2_BUF1SIZ(rxd->rxd_control2);
788 		plen += m->m_next->m_next->m_len =
789 		    RXD_CTL2_BUF2SIZ(rxd->rxd_control2);
790 		plen += m->m_next->m_next->m_next->m_len =
791 		    RXD_CTL3_BUF3SIZ(rxd->rxd_control3);
792 		plen += m->m_next->m_next->m_next->m_next->m_len =
793 		    RXD_CTL3_BUF4SIZ(rxd->rxd_control3);
794 #endif
795 		m_set_rcvif(m, ifp);
796 		m->m_pkthdr.len = plen;
797 
798 		val = rxd->rxd_control1;
799 
800 		if (xge_add_rxbuf(sc, sc->sc_nextrx)) {
801 			/* Failed, recycle this mbuf */
802 #if RX_MODE == RX_MODE_1
803 			rxd->rxd_control2 = RXD_MKCTL2(MCLBYTES, 0, 0);
804 			rxd->rxd_control1 = RXD_CTL1_OWN;
805 #elif RX_MODE == RX_MODE_3
806 #elif RX_MODE == RX_MODE_5
807 #endif
808 			XGE_RXSYNC(sc->sc_nextrx,
809 			    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
810 			if_statinc(ifp, if_ierrors);
811 			break;
812 		}
813 
814 		if (RXD_CTL1_PROTOS(val) & (RXD_CTL1_P_IPv4|RXD_CTL1_P_IPv6)) {
815 			m->m_pkthdr.csum_flags |= M_CSUM_IPv4;
816 			if (RXD_CTL1_L3CSUM(val) != 0xffff)
817 				m->m_pkthdr.csum_flags |= M_CSUM_IPv4_BAD;
818 		}
819 		if (RXD_CTL1_PROTOS(val) & RXD_CTL1_P_TCP) {
820 			m->m_pkthdr.csum_flags |= M_CSUM_TCPv4 | M_CSUM_TCPv6;
821 			if (RXD_CTL1_L4CSUM(val) != 0xffff)
822 				m->m_pkthdr.csum_flags |= M_CSUM_TCP_UDP_BAD;
823 		}
824 		if (RXD_CTL1_PROTOS(val) & RXD_CTL1_P_UDP) {
825 			m->m_pkthdr.csum_flags |= M_CSUM_UDPv4 | M_CSUM_UDPv6;
826 			if (RXD_CTL1_L4CSUM(val) != 0xffff)
827 				m->m_pkthdr.csum_flags |= M_CSUM_TCP_UDP_BAD;
828 		}
829 
830 		if_percpuq_enqueue(ifp->if_percpuq, m);
831 
832 		if (++sc->sc_nextrx == NRXREAL)
833 			sc->sc_nextrx = 0;
834 
835 	}
836 
837 	return 0;
838 }
839 
840 int
841 xge_ioctl(struct ifnet *ifp, u_long cmd, void *data)
842 {
843 	struct xge_softc *sc = ifp->if_softc;
844 	struct ifreq *ifr = (struct ifreq *) data;
845 	int s, error = 0;
846 
847 	s = splnet();
848 
849 	switch (cmd) {
850 	case SIOCSIFMTU:
851 		if (ifr->ifr_mtu < ETHERMIN || ifr->ifr_mtu > XGE_MAX_MTU)
852 			error = EINVAL;
853 		else if ((error = ifioctl_common(ifp, cmd, data))
854 		    == ENETRESET) {
855 			PIF_WCSR(RMAC_MAX_PYLD_LEN,
856 			    RMAC_PYLD_LEN(ifr->ifr_mtu));
857 			error = 0;
858 		}
859 		break;
860 
861 	default:
862 		if ((error = ether_ioctl(ifp, cmd, data)) != ENETRESET)
863 			break;
864 
865 		error = 0;
866 
867 		if (cmd != SIOCADDMULTI && cmd != SIOCDELMULTI)
868 			;
869 		else if (ifp->if_flags & IFF_RUNNING) {
870 			/* Change multicast list */
871 			xge_mcast_filter(sc);
872 		}
873 		break;
874 	}
875 
876 	splx(s);
877 	return error;
878 }
879 
880 void
881 xge_mcast_filter(struct xge_softc *sc)
882 {
883 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
884 	struct ethercom *ec = &sc->sc_ethercom;
885 	struct ether_multi *enm;
886 	struct ether_multistep step;
887 	int i, numaddr = 1; /* first slot used for card unicast address */
888 	uint64_t val;
889 
890 	ETHER_LOCK(ec);
891 	ETHER_FIRST_MULTI(step, ec, enm);
892 	while (enm != NULL) {
893 		if (memcmp(enm->enm_addrlo, enm->enm_addrhi, ETHER_ADDR_LEN)) {
894 			/* Skip ranges */
895 			ETHER_UNLOCK(ec);
896 			goto allmulti;
897 		}
898 		if (numaddr == MAX_MCAST_ADDR) {
899 			ETHER_UNLOCK(ec);
900 			goto allmulti;
901 		}
902 		for (val = 0, i = 0; i < ETHER_ADDR_LEN; i++) {
903 			val <<= 8;
904 			val |= enm->enm_addrlo[i];
905 		}
906 		PIF_WCSR(RMAC_ADDR_DATA0_MEM, val << 16);
907 		PIF_WCSR(RMAC_ADDR_DATA1_MEM, 0xFFFFFFFFFFFFFFFFULL);
908 		PIF_WCSR(RMAC_ADDR_CMD_MEM, RMAC_ADDR_CMD_MEM_WE |
909 		    RMAC_ADDR_CMD_MEM_STR | RMAC_ADDR_CMD_MEM_OFF(numaddr));
910 		while (PIF_RCSR(RMAC_ADDR_CMD_MEM) & RMAC_ADDR_CMD_MEM_STR)
911 			;
912 		numaddr++;
913 		ETHER_NEXT_MULTI(step, enm);
914 	}
915 	ETHER_UNLOCK(ec);
916 	/* set the remaining entries to the broadcast address */
917 	for (i = numaddr; i < MAX_MCAST_ADDR; i++) {
918 		PIF_WCSR(RMAC_ADDR_DATA0_MEM, 0xffffffffffff0000ULL);
919 		PIF_WCSR(RMAC_ADDR_DATA1_MEM, 0xFFFFFFFFFFFFFFFFULL);
920 		PIF_WCSR(RMAC_ADDR_CMD_MEM, RMAC_ADDR_CMD_MEM_WE |
921 		    RMAC_ADDR_CMD_MEM_STR | RMAC_ADDR_CMD_MEM_OFF(i));
922 		while (PIF_RCSR(RMAC_ADDR_CMD_MEM) & RMAC_ADDR_CMD_MEM_STR)
923 			;
924 	}
925 	ifp->if_flags &= ~IFF_ALLMULTI;
926 	return;
927 
928 allmulti:
929 	/* Just receive everything with the multicast bit set */
930 	ifp->if_flags |= IFF_ALLMULTI;
931 	PIF_WCSR(RMAC_ADDR_DATA0_MEM, 0x8000000000000000ULL);
932 	PIF_WCSR(RMAC_ADDR_DATA1_MEM, 0xF000000000000000ULL);
933 	PIF_WCSR(RMAC_ADDR_CMD_MEM, RMAC_ADDR_CMD_MEM_WE |
934 	    RMAC_ADDR_CMD_MEM_STR | RMAC_ADDR_CMD_MEM_OFF(1));
935 	while (PIF_RCSR(RMAC_ADDR_CMD_MEM) & RMAC_ADDR_CMD_MEM_STR)
936 		;
937 }
938 
939 void
940 xge_start(struct ifnet *ifp)
941 {
942 	struct xge_softc *sc = ifp->if_softc;
943 	struct txd *txd = NULL; /* XXX - gcc */
944 	bus_dmamap_t dmp;
945 	struct	mbuf *m;
946 	uint64_t par, lcr;
947 	int nexttx = 0, ntxd, error, i;
948 
949 	if ((ifp->if_flags & (IFF_RUNNING | IFF_OACTIVE)) != IFF_RUNNING)
950 		return;
951 
952 	par = lcr = 0;
953 	for (;;) {
954 		IFQ_POLL(&ifp->if_snd, m);
955 		if (m == NULL)
956 			break;	/* out of packets */
957 
958 		if (sc->sc_nexttx == sc->sc_lasttx)
959 			break;	/* No more space */
960 
961 		nexttx = sc->sc_nexttx;
962 		dmp = sc->sc_txm[nexttx];
963 
964 		if ((error = bus_dmamap_load_mbuf(sc->sc_dmat, dmp, m,
965 		    BUS_DMA_WRITE | BUS_DMA_NOWAIT)) != 0) {
966 			printf("%s: bus_dmamap_load_mbuf error %d\n",
967 			    XNAME, error);
968 			break;
969 		}
970 		IFQ_DEQUEUE(&ifp->if_snd, m);
971 
972 		bus_dmamap_sync(sc->sc_dmat, dmp, 0, dmp->dm_mapsize,
973 		    BUS_DMASYNC_PREWRITE);
974 
975 		txd = sc->sc_txd[nexttx];
976 		sc->sc_txb[nexttx] = m;
977 		for (i = 0; i < dmp->dm_nsegs; i++) {
978 			if (dmp->dm_segs[i].ds_len == 0)
979 				continue;
980 			txd->txd_control1 = dmp->dm_segs[i].ds_len;
981 			txd->txd_control2 = 0;
982 			txd->txd_bufaddr = dmp->dm_segs[i].ds_addr;
983 			txd++;
984 		}
985 		ntxd = txd - sc->sc_txd[nexttx] - 1;
986 		txd = sc->sc_txd[nexttx];
987 		txd->txd_control1 |= TXD_CTL1_OWN | TXD_CTL1_GCF;
988 		txd->txd_control2 = TXD_CTL2_UTIL;
989 		if (m->m_pkthdr.csum_flags & M_CSUM_TSOv4) {
990 			txd->txd_control1 |= TXD_CTL1_MSS(m->m_pkthdr.segsz);
991 			txd->txd_control1 |= TXD_CTL1_LSO;
992 		}
993 
994 		if (m->m_pkthdr.csum_flags & M_CSUM_IPv4)
995 			txd->txd_control2 |= TXD_CTL2_CIPv4;
996 		if (m->m_pkthdr.csum_flags & M_CSUM_TCPv4)
997 			txd->txd_control2 |= TXD_CTL2_CTCP;
998 		if (m->m_pkthdr.csum_flags & M_CSUM_UDPv4)
999 			txd->txd_control2 |= TXD_CTL2_CUDP;
1000 		txd[ntxd].txd_control1 |= TXD_CTL1_GCL;
1001 
1002 		bus_dmamap_sync(sc->sc_dmat, dmp, 0, dmp->dm_mapsize,
1003 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1004 
1005 		par = sc->sc_txdp[nexttx];
1006 		lcr = TXDL_NUMTXD(ntxd) | TXDL_LGC_FIRST | TXDL_LGC_LAST;
1007 		if (m->m_pkthdr.csum_flags & M_CSUM_TSOv4)
1008 			lcr |= TXDL_SFF;
1009 		TXP_WCSR(TXDL_PAR, par);
1010 		TXP_WCSR(TXDL_LCR, lcr);
1011 
1012 		bpf_mtap(ifp, m, BPF_D_OUT);
1013 
1014 		sc->sc_nexttx = NEXTTX(nexttx);
1015 	}
1016 }
1017 
1018 /*
1019  * Allocate DMA memory for transmit descriptor fragments.
1020  * Only one map is used for all descriptors.
1021  */
1022 int
1023 xge_alloc_txmem(struct xge_softc *sc)
1024 {
1025 	struct txd *txp;
1026 	bus_dma_segment_t seg;
1027 	bus_addr_t txdp;
1028 	void *kva;
1029 	int i, rseg, state;
1030 
1031 #define TXMAPSZ (NTXDESCS*NTXFRAGS*sizeof(struct txd))
1032 	state = 0;
1033 	if (bus_dmamem_alloc(sc->sc_dmat, TXMAPSZ, PAGE_SIZE, 0,
1034 	    &seg, 1, &rseg, BUS_DMA_NOWAIT))
1035 		goto err;
1036 	state++;
1037 	if (bus_dmamem_map(sc->sc_dmat, &seg, rseg, TXMAPSZ, &kva,
1038 	    BUS_DMA_NOWAIT))
1039 		goto err;
1040 
1041 	state++;
1042 	if (bus_dmamap_create(sc->sc_dmat, TXMAPSZ, 1, TXMAPSZ, 0,
1043 	    BUS_DMA_NOWAIT, &sc->sc_txmap))
1044 		goto err;
1045 	state++;
1046 	if (bus_dmamap_load(sc->sc_dmat, sc->sc_txmap,
1047 	    kva, TXMAPSZ, NULL, BUS_DMA_NOWAIT))
1048 		goto err;
1049 
1050 	/* setup transmit array pointers */
1051 	txp = (struct txd *)kva;
1052 	txdp = seg.ds_addr;
1053 	for (txp = (struct txd *)kva, i = 0; i < NTXDESCS; i++) {
1054 		sc->sc_txd[i] = txp;
1055 		sc->sc_txdp[i] = txdp;
1056 		txp += NTXFRAGS;
1057 		txdp += (NTXFRAGS * sizeof(struct txd));
1058 	}
1059 
1060 	return 0;
1061 
1062 err:
1063 	if (state > 2)
1064 		bus_dmamap_destroy(sc->sc_dmat, sc->sc_txmap);
1065 	if (state > 1)
1066 		bus_dmamem_unmap(sc->sc_dmat, kva, TXMAPSZ);
1067 	if (state > 0)
1068 		bus_dmamem_free(sc->sc_dmat, &seg, rseg);
1069 	return ENOBUFS;
1070 }
1071 
1072 /*
1073  * Allocate DMA memory for receive descriptor,
1074  * only one map is used for all descriptors.
1075  * link receive descriptor pages together.
1076  */
1077 int
1078 xge_alloc_rxmem(struct xge_softc *sc)
1079 {
1080 	struct rxd_4k *rxpp;
1081 	bus_dma_segment_t seg;
1082 	void *kva;
1083 	int i, rseg, state;
1084 
1085 	/* sanity check */
1086 	if (sizeof(struct rxd_4k) != XGE_PAGE) {
1087 		printf("bad compiler struct alignment, %d != %d\n",
1088 		    (int)sizeof(struct rxd_4k), XGE_PAGE);
1089 		return EINVAL;
1090 	}
1091 
1092 	state = 0;
1093 	if (bus_dmamem_alloc(sc->sc_dmat, RXMAPSZ, PAGE_SIZE, 0,
1094 	    &seg, 1, &rseg, BUS_DMA_NOWAIT))
1095 		goto err;
1096 	state++;
1097 	if (bus_dmamem_map(sc->sc_dmat, &seg, rseg, RXMAPSZ, &kva,
1098 	    BUS_DMA_NOWAIT))
1099 		goto err;
1100 
1101 	state++;
1102 	if (bus_dmamap_create(sc->sc_dmat, RXMAPSZ, 1, RXMAPSZ, 0,
1103 	    BUS_DMA_NOWAIT, &sc->sc_rxmap))
1104 		goto err;
1105 	state++;
1106 	if (bus_dmamap_load(sc->sc_dmat, sc->sc_rxmap,
1107 	    kva, RXMAPSZ, NULL, BUS_DMA_NOWAIT))
1108 		goto err;
1109 
1110 	/* setup receive page link pointers */
1111 	for (rxpp = (struct rxd_4k *)kva, i = 0; i < NRXPAGES; i++, rxpp++) {
1112 		sc->sc_rxd_4k[i] = rxpp;
1113 		rxpp->r4_next = (uint64_t)sc->sc_rxmap->dm_segs[0].ds_addr +
1114 		    (i*sizeof(struct rxd_4k)) + sizeof(struct rxd_4k);
1115 	}
1116 	sc->sc_rxd_4k[NRXPAGES-1]->r4_next =
1117 	    (uint64_t)sc->sc_rxmap->dm_segs[0].ds_addr;
1118 
1119 	return 0;
1120 
1121 err:
1122 	if (state > 2)
1123 		bus_dmamap_destroy(sc->sc_dmat, sc->sc_txmap);
1124 	if (state > 1)
1125 		bus_dmamem_unmap(sc->sc_dmat, kva, TXMAPSZ);
1126 	if (state > 0)
1127 		bus_dmamem_free(sc->sc_dmat, &seg, rseg);
1128 	return ENOBUFS;
1129 }
1130 
1131 
1132 /*
1133  * Add a new mbuf chain to descriptor id.
1134  */
1135 int
1136 xge_add_rxbuf(struct xge_softc *sc, int id)
1137 {
1138 	struct rxdesc *rxd;
1139 	struct mbuf *m[5];
1140 	int page, desc, error;
1141 #if RX_MODE == RX_MODE_5
1142 	int i;
1143 #endif
1144 
1145 	page = id/NDESC_BUFMODE;
1146 	desc = id%NDESC_BUFMODE;
1147 
1148 	rxd = &sc->sc_rxd_4k[page]->r4_rxd[desc];
1149 
1150 	/*
1151 	 * Allocate mbufs.
1152 	 * Currently five mbufs and two clusters are used,
1153 	 * the hardware will put (ethernet, ip, tcp/udp) headers in
1154 	 * their own buffer and the clusters are only used for data.
1155 	 */
1156 #if RX_MODE == RX_MODE_1
1157 	MGETHDR(m[0], M_DONTWAIT, MT_DATA);
1158 	if (m[0] == NULL)
1159 		return ENOBUFS;
1160 	MCLGET(m[0], M_DONTWAIT);
1161 	if ((m[0]->m_flags & M_EXT) == 0) {
1162 		m_freem(m[0]);
1163 		return ENOBUFS;
1164 	}
1165 	m[0]->m_len = m[0]->m_pkthdr.len = m[0]->m_ext.ext_size;
1166 #elif RX_MODE == RX_MODE_3
1167 #error missing rxmode 3.
1168 #elif RX_MODE == RX_MODE_5
1169 	MGETHDR(m[0], M_DONTWAIT, MT_DATA);
1170 	for (i = 1; i < 5; i++) {
1171 		MGET(m[i], M_DONTWAIT, MT_DATA);
1172 	}
1173 	if (m[3])
1174 		MCLGET(m[3], M_DONTWAIT);
1175 	if (m[4])
1176 		MCLGET(m[4], M_DONTWAIT);
1177 	if (!m[0] || !m[1] || !m[2] || !m[3] || !m[4] ||
1178 	    ((m[3]->m_flags & M_EXT) == 0) || ((m[4]->m_flags & M_EXT) == 0)) {
1179 		/* Out of something */
1180 		for (i = 0; i < 5; i++)
1181 			if (m[i] != NULL)
1182 				m_free(m[i]);
1183 		return ENOBUFS;
1184 	}
1185 	/* Link'em together */
1186 	m[0]->m_next = m[1];
1187 	m[1]->m_next = m[2];
1188 	m[2]->m_next = m[3];
1189 	m[3]->m_next = m[4];
1190 #else
1191 #error bad mode RX_MODE
1192 #endif
1193 
1194 	if (sc->sc_rxb[id])
1195 		bus_dmamap_unload(sc->sc_dmat, sc->sc_rxm[id]);
1196 	sc->sc_rxb[id] = m[0];
1197 
1198 	error = bus_dmamap_load_mbuf(sc->sc_dmat, sc->sc_rxm[id], m[0],
1199 	    BUS_DMA_READ | BUS_DMA_NOWAIT);
1200 	if (error)
1201 		return error;
1202 	bus_dmamap_sync(sc->sc_dmat, sc->sc_rxm[id], 0,
1203 	    sc->sc_rxm[id]->dm_mapsize, BUS_DMASYNC_PREREAD);
1204 
1205 #if RX_MODE == RX_MODE_1
1206 	rxd->rxd_control2 = RXD_MKCTL2(m[0]->m_len, 0, 0);
1207 	rxd->rxd_buf0 = (uint64_t)sc->sc_rxm[id]->dm_segs[0].ds_addr;
1208 	rxd->rxd_control1 = RXD_CTL1_OWN;
1209 #elif RX_MODE == RX_MODE_3
1210 #elif RX_MODE == RX_MODE_5
1211 	rxd->rxd_control3 = RXD_MKCTL3(0, m[3]->m_len, m[4]->m_len);
1212 	rxd->rxd_control2 = RXD_MKCTL2(m[0]->m_len, m[1]->m_len, m[2]->m_len);
1213 	rxd->rxd_buf0 = (uint64_t)sc->sc_rxm[id]->dm_segs[0].ds_addr;
1214 	rxd->rxd_buf1 = (uint64_t)sc->sc_rxm[id]->dm_segs[1].ds_addr;
1215 	rxd->rxd_buf2 = (uint64_t)sc->sc_rxm[id]->dm_segs[2].ds_addr;
1216 	rxd->rxd_buf3 = (uint64_t)sc->sc_rxm[id]->dm_segs[3].ds_addr;
1217 	rxd->rxd_buf4 = (uint64_t)sc->sc_rxm[id]->dm_segs[4].ds_addr;
1218 	rxd->rxd_control1 = RXD_CTL1_OWN;
1219 #endif
1220 
1221 	XGE_RXSYNC(id, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1222 	return 0;
1223 }
1224 
1225 /*
1226  * These magics comes from the FreeBSD driver.
1227  */
1228 int
1229 xge_setup_xgxs(struct xge_softc *sc)
1230 {
1231 	/* The magic numbers are described in the users guide */
1232 
1233 	/* Writing to MDIO 0x8000 (Global Config 0) */
1234 	PIF_WCSR(DTX_CONTROL, 0x8000051500000000ULL); DELAY(50);
1235 	PIF_WCSR(DTX_CONTROL, 0x80000515000000E0ULL); DELAY(50);
1236 	PIF_WCSR(DTX_CONTROL, 0x80000515D93500E4ULL); DELAY(50);
1237 
1238 	/* Writing to MDIO 0x8000 (Global Config 1) */
1239 	PIF_WCSR(DTX_CONTROL, 0x8001051500000000ULL); DELAY(50);
1240 	PIF_WCSR(DTX_CONTROL, 0x80010515000000e0ULL); DELAY(50);
1241 	PIF_WCSR(DTX_CONTROL, 0x80010515001e00e4ULL); DELAY(50);
1242 
1243 	/* Reset the Gigablaze */
1244 	PIF_WCSR(DTX_CONTROL, 0x8002051500000000ULL); DELAY(50);
1245 	PIF_WCSR(DTX_CONTROL, 0x80020515000000E0ULL); DELAY(50);
1246 	PIF_WCSR(DTX_CONTROL, 0x80020515F21000E4ULL); DELAY(50);
1247 
1248 	/* read the pole settings */
1249 	PIF_WCSR(DTX_CONTROL, 0x8000051500000000ULL); DELAY(50);
1250 	PIF_WCSR(DTX_CONTROL, 0x80000515000000e0ULL); DELAY(50);
1251 	PIF_WCSR(DTX_CONTROL, 0x80000515000000ecULL); DELAY(50);
1252 
1253 	PIF_WCSR(DTX_CONTROL, 0x8001051500000000ULL); DELAY(50);
1254 	PIF_WCSR(DTX_CONTROL, 0x80010515000000e0ULL); DELAY(50);
1255 	PIF_WCSR(DTX_CONTROL, 0x80010515000000ecULL); DELAY(50);
1256 
1257 	PIF_WCSR(DTX_CONTROL, 0x8002051500000000ULL); DELAY(50);
1258 	PIF_WCSR(DTX_CONTROL, 0x80020515000000e0ULL); DELAY(50);
1259 	PIF_WCSR(DTX_CONTROL, 0x80020515000000ecULL); DELAY(50);
1260 
1261 	/* Workaround for TX Lane XAUI initialization error.
1262 	   Read Xpak PHY register 24 for XAUI lane status */
1263 	PIF_WCSR(DTX_CONTROL, 0x0018040000000000ULL); DELAY(50);
1264 	PIF_WCSR(DTX_CONTROL, 0x00180400000000e0ULL); DELAY(50);
1265 	PIF_WCSR(DTX_CONTROL, 0x00180400000000ecULL); DELAY(50);
1266 
1267 	/*
1268 	 * Reading the MDIO control with value 0x1804001c0F001c
1269 	 * means the TxLanes were already in sync
1270 	 * Reading the MDIO control with value 0x1804000c0x001c
1271 	 * means some TxLanes are not in sync where x is a 4-bit
1272 	 * value representing each lanes
1273 	 */
1274 #if 0
1275 	val = PIF_RCSR(MDIO_CONTROL);
1276 	if (val != 0x1804001c0F001cULL) {
1277 		printf("%s: MDIO_CONTROL: %llx != %llx\n",
1278 		    XNAME, val, 0x1804001c0F001cULL);
1279 		return 1;
1280 	}
1281 #endif
1282 
1283 	/* Set and remove the DTE XS INTLoopBackN */
1284 	PIF_WCSR(DTX_CONTROL, 0x0000051500000000ULL); DELAY(50);
1285 	PIF_WCSR(DTX_CONTROL, 0x00000515604000e0ULL); DELAY(50);
1286 	PIF_WCSR(DTX_CONTROL, 0x00000515604000e4ULL); DELAY(50);
1287 	PIF_WCSR(DTX_CONTROL, 0x00000515204000e4ULL); DELAY(50);
1288 	PIF_WCSR(DTX_CONTROL, 0x00000515204000ecULL); DELAY(50);
1289 
1290 #if 0
1291 	/* Reading the DTX control register Should be 0x5152040001c */
1292 	val = PIF_RCSR(DTX_CONTROL);
1293 	if (val != 0x5152040001cULL) {
1294 		printf("%s: DTX_CONTROL: %llx != %llx\n",
1295 		    XNAME, val, 0x5152040001cULL);
1296 		return 1;
1297 	}
1298 #endif
1299 
1300 	PIF_WCSR(MDIO_CONTROL, 0x0018040000000000ULL); DELAY(50);
1301 	PIF_WCSR(MDIO_CONTROL, 0x00180400000000e0ULL); DELAY(50);
1302 	PIF_WCSR(MDIO_CONTROL, 0x00180400000000ecULL); DELAY(50);
1303 
1304 #if 0
1305 	/* Reading the MIOD control should be 0x1804001c0f001c */
1306 	val = PIF_RCSR(MDIO_CONTROL);
1307 	if (val != 0x1804001c0f001cULL) {
1308 		printf("%s: MDIO_CONTROL2: %llx != %llx\n",
1309 		    XNAME, val, 0x1804001c0f001cULL);
1310 		return 1;
1311 	}
1312 #endif
1313 	return 0;
1314 }
1315