xref: /netbsd-src/sys/dev/pci/if_xge.c (revision f3cfa6f6ce31685c6c4a758bc430e69eb99f50a4)
1 /*      $NetBSD: if_xge.c,v 1.32 2019/05/29 10:07:29 msaitoh Exp $ */
2 
3 /*
4  * Copyright (c) 2004, SUNET, Swedish University Computer Network.
5  * All rights reserved.
6  *
7  * Written by Anders Magnusson for SUNET, Swedish University Computer Network.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. All advertising materials mentioning features or use of this software
18  *    must display the following acknowledgement:
19  *      This product includes software developed for the NetBSD Project by
20  *      SUNET, Swedish University Computer Network.
21  * 4. The name of SUNET may not be used to endorse or promote products
22  *    derived from this software without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY SUNET ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
26  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
27  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL SUNET
28  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34  * POSSIBILITY OF SUCH DAMAGE.
35  */
36 
37 /*
38  * Device driver for the S2io Xframe Ten Gigabit Ethernet controller.
39  *
40  * TODO (in no specific order):
41  *	HW VLAN support.
42  *	IPv6 HW cksum.
43  */
44 
45 #include <sys/cdefs.h>
46 __KERNEL_RCSID(0, "$NetBSD: if_xge.c,v 1.32 2019/05/29 10:07:29 msaitoh Exp $");
47 
48 
49 #include <sys/param.h>
50 #include <sys/systm.h>
51 #include <sys/mbuf.h>
52 #include <sys/malloc.h>
53 #include <sys/kernel.h>
54 #include <sys/proc.h>
55 #include <sys/socket.h>
56 #include <sys/device.h>
57 
58 #include <net/if.h>
59 #include <net/if_dl.h>
60 #include <net/if_media.h>
61 #include <net/if_ether.h>
62 #include <net/bpf.h>
63 
64 #include <sys/bus.h>
65 #include <sys/intr.h>
66 #include <machine/endian.h>
67 
68 #include <dev/mii/mii.h>
69 #include <dev/mii/miivar.h>
70 
71 #include <dev/pci/pcivar.h>
72 #include <dev/pci/pcireg.h>
73 #include <dev/pci/pcidevs.h>
74 
75 #include <dev/pci/if_xgereg.h>
76 
77 /*
78  * Some tunable constants, tune with care!
79  */
80 #define RX_MODE		RX_MODE_1  /* Receive mode (buffer usage, see below) */
81 #define NRXDESCS	1016	   /* # of receive descriptors (requested) */
82 #define NTXDESCS	8192	   /* Number of transmit descriptors */
83 #define NTXFRAGS	100	   /* Max fragments per packet */
84 #define XGE_EVENT_COUNTERS	   /* Instrumentation */
85 
86 /*
87  * Receive buffer modes; 1, 3 or 5 buffers.
88  */
89 #define RX_MODE_1 1
90 #define RX_MODE_3 3
91 #define RX_MODE_5 5
92 
93 /*
94  * Use clever macros to avoid a bunch of #ifdef's.
95  */
96 #define XCONCAT3(x, y, z) x ## y ## z
97 #define CONCAT3(x, y, z) XCONCAT3(x, y, z)
98 #define NDESC_BUFMODE CONCAT3(NDESC_, RX_MODE, BUFMODE)
99 #define rxd_4k CONCAT3(rxd, RX_MODE, _4k)
100 #define rxdesc ___CONCAT(rxd, RX_MODE)
101 
102 #define NEXTTX(x)	(((x)+1) % NTXDESCS)
103 #define NRXFRAGS	RX_MODE /* hardware imposed frags */
104 #define NRXPAGES	((NRXDESCS/NDESC_BUFMODE)+1)
105 #define NRXREAL		(NRXPAGES*NDESC_BUFMODE)
106 #define RXMAPSZ		(NRXPAGES*PAGE_SIZE)
107 
108 #ifdef XGE_EVENT_COUNTERS
109 #define XGE_EVCNT_INCR(ev)	(ev)->ev_count++
110 #else
111 #define XGE_EVCNT_INCR(ev)	/* nothing */
112 #endif
113 
114 /*
115  * Magics to fix a bug when the mac address can't be read correctly.
116  * Comes from the Linux driver.
117  */
118 static uint64_t fix_mac[] = {
119 	0x0060000000000000ULL, 0x0060600000000000ULL,
120 	0x0040600000000000ULL, 0x0000600000000000ULL,
121 	0x0020600000000000ULL, 0x0060600000000000ULL,
122 	0x0020600000000000ULL, 0x0060600000000000ULL,
123 	0x0020600000000000ULL, 0x0060600000000000ULL,
124 	0x0020600000000000ULL, 0x0060600000000000ULL,
125 	0x0020600000000000ULL, 0x0060600000000000ULL,
126 	0x0020600000000000ULL, 0x0060600000000000ULL,
127 	0x0020600000000000ULL, 0x0060600000000000ULL,
128 	0x0020600000000000ULL, 0x0060600000000000ULL,
129 	0x0020600000000000ULL, 0x0060600000000000ULL,
130 	0x0020600000000000ULL, 0x0060600000000000ULL,
131 	0x0020600000000000ULL, 0x0000600000000000ULL,
132 	0x0040600000000000ULL, 0x0060600000000000ULL,
133 };
134 
135 
136 struct xge_softc {
137 	device_t sc_dev;
138 	struct ethercom sc_ethercom;
139 #define sc_if sc_ethercom.ec_if
140 	bus_dma_tag_t sc_dmat;
141 	bus_space_tag_t sc_st;
142 	bus_space_handle_t sc_sh;
143 	bus_space_tag_t sc_txt;
144 	bus_space_handle_t sc_txh;
145 	void *sc_ih;
146 
147 	struct ifmedia xena_media;
148 	pcireg_t sc_pciregs[16];
149 
150 	/* Transmit structures */
151 	struct txd *sc_txd[NTXDESCS];	/* transmit frags array */
152 	bus_addr_t sc_txdp[NTXDESCS];	/* bus address of transmit frags */
153 	bus_dmamap_t sc_txm[NTXDESCS];	/* transmit frags map */
154 	struct mbuf *sc_txb[NTXDESCS];	/* transmit mbuf pointer */
155 	int sc_nexttx, sc_lasttx;
156 	bus_dmamap_t sc_txmap;		/* transmit descriptor map */
157 
158 	/* Receive data */
159 	bus_dmamap_t sc_rxmap;		/* receive descriptor map */
160 	struct rxd_4k *sc_rxd_4k[NRXPAGES]; /* receive desc pages */
161 	bus_dmamap_t sc_rxm[NRXREAL];	/* receive buffer map */
162 	struct mbuf *sc_rxb[NRXREAL];	/* mbufs on receive descriptors */
163 	int sc_nextrx;			/* next descriptor to check */
164 
165 #ifdef XGE_EVENT_COUNTERS
166 	struct evcnt sc_intr;	/* # of interrupts */
167 	struct evcnt sc_txintr;	/* # of transmit interrupts */
168 	struct evcnt sc_rxintr;	/* # of receive interrupts */
169 	struct evcnt sc_txqe;	/* # of xmit intrs when board queue empty */
170 #endif
171 };
172 
173 static int xge_match(device_t parent, cfdata_t cf, void *aux);
174 static void xge_attach(device_t parent, device_t self, void *aux);
175 static int xge_alloc_txmem(struct xge_softc *);
176 static int xge_alloc_rxmem(struct xge_softc *);
177 static void xge_start(struct ifnet *);
178 static void xge_stop(struct ifnet *, int);
179 static int xge_add_rxbuf(struct xge_softc *, int);
180 static void xge_mcast_filter(struct xge_softc *sc);
181 static int xge_setup_xgxs(struct xge_softc *sc);
182 static int xge_ioctl(struct ifnet *ifp, u_long cmd, void *data);
183 static int xge_init(struct ifnet *ifp);
184 static void xge_ifmedia_status(struct ifnet *, struct ifmediareq *);
185 static int xge_xgmii_mediachange(struct ifnet *);
186 static int xge_intr(void  *);
187 
188 /*
189  * Helpers to address registers.
190  */
191 #define PIF_WCSR(csr, val)	pif_wcsr(sc, csr, val)
192 #define PIF_RCSR(csr)		pif_rcsr(sc, csr)
193 #define TXP_WCSR(csr, val)	txp_wcsr(sc, csr, val)
194 #define PIF_WKEY(csr, val)	pif_wkey(sc, csr, val)
195 
196 static inline void
197 pif_wcsr(struct xge_softc *sc, bus_size_t csr, uint64_t val)
198 {
199 	uint32_t lval, hval;
200 
201 	lval = val&0xffffffff;
202 	hval = val>>32;
203 	bus_space_write_4(sc->sc_st, sc->sc_sh, csr, lval);
204 	bus_space_write_4(sc->sc_st, sc->sc_sh, csr+4, hval);
205 }
206 
207 static inline uint64_t
208 pif_rcsr(struct xge_softc *sc, bus_size_t csr)
209 {
210 	uint64_t val, val2;
211 	val = bus_space_read_4(sc->sc_st, sc->sc_sh, csr);
212 	val2 = bus_space_read_4(sc->sc_st, sc->sc_sh, csr+4);
213 	val |= (val2 << 32);
214 	return val;
215 }
216 
217 static inline void
218 txp_wcsr(struct xge_softc *sc, bus_size_t csr, uint64_t val)
219 {
220 	uint32_t lval, hval;
221 
222 	lval = val&0xffffffff;
223 	hval = val>>32;
224 	bus_space_write_4(sc->sc_txt, sc->sc_txh, csr, lval);
225 	bus_space_write_4(sc->sc_txt, sc->sc_txh, csr+4, hval);
226 }
227 
228 
229 static inline void
230 pif_wkey(struct xge_softc *sc, bus_size_t csr, uint64_t val)
231 {
232 	uint32_t lval, hval;
233 
234 	lval = val&0xffffffff;
235 	hval = val>>32;
236 	PIF_WCSR(RMAC_CFG_KEY, RMAC_KEY_VALUE);
237 	bus_space_write_4(sc->sc_st, sc->sc_sh, csr, lval);
238 	PIF_WCSR(RMAC_CFG_KEY, RMAC_KEY_VALUE);
239 	bus_space_write_4(sc->sc_st, sc->sc_sh, csr+4, hval);
240 }
241 
242 
243 CFATTACH_DECL_NEW(xge, sizeof(struct xge_softc),
244     xge_match, xge_attach, NULL, NULL);
245 
246 #define XNAME device_xname(sc->sc_dev)
247 
248 #define XGE_RXSYNC(desc, what) \
249 	bus_dmamap_sync(sc->sc_dmat, sc->sc_rxmap, \
250 	(desc/NDESC_BUFMODE) * XGE_PAGE + sizeof(struct rxdesc) * \
251 	(desc%NDESC_BUFMODE), sizeof(struct rxdesc), what)
252 #define XGE_RXD(desc)	&sc->sc_rxd_4k[desc/NDESC_BUFMODE]-> \
253 	r4_rxd[desc%NDESC_BUFMODE]
254 
255 /*
256  * Non-tunable constants.
257  */
258 #define XGE_MAX_MTU		9600
259 #define	XGE_IP_MAXPACKET	65535	/* same as IP_MAXPACKET */
260 
261 static int
262 xge_match(device_t parent, cfdata_t cf, void *aux)
263 {
264 	struct pci_attach_args *pa = aux;
265 
266 	if (PCI_VENDOR(pa->pa_id) == PCI_VENDOR_S2IO &&
267 	    PCI_PRODUCT(pa->pa_id) == PCI_PRODUCT_S2IO_XFRAME)
268 		return 1;
269 
270 	return 0;
271 }
272 
273 void
274 xge_attach(device_t parent, device_t self, void *aux)
275 {
276 	struct pci_attach_args *pa = aux;
277 	struct xge_softc *sc;
278 	struct ifnet *ifp;
279 	pcireg_t memtype;
280 	pci_intr_handle_t ih;
281 	const char *intrstr = NULL;
282 	pci_chipset_tag_t pc = pa->pa_pc;
283 	uint8_t enaddr[ETHER_ADDR_LEN];
284 	uint64_t val;
285 	int i;
286 	char intrbuf[PCI_INTRSTR_LEN];
287 
288 	sc = device_private(self);
289 	sc->sc_dev = self;
290 	sc->sc_dmat = pa->pa_dmat;
291 
292 	/* Get BAR0 address */
293 	memtype = pci_mapreg_type(pa->pa_pc, pa->pa_tag, XGE_PIF_BAR);
294 	if (pci_mapreg_map(pa, XGE_PIF_BAR, memtype, 0,
295 	    &sc->sc_st, &sc->sc_sh, 0, 0)) {
296 		aprint_error("%s: unable to map PIF BAR registers\n", XNAME);
297 		return;
298 	}
299 
300 	memtype = pci_mapreg_type(pa->pa_pc, pa->pa_tag, XGE_TXP_BAR);
301 	if (pci_mapreg_map(pa, XGE_TXP_BAR, memtype, 0,
302 	    &sc->sc_txt, &sc->sc_txh, 0, 0)) {
303 		aprint_error("%s: unable to map TXP BAR registers\n", XNAME);
304 		return;
305 	}
306 
307 	/* Save PCI config space */
308 	for (i = 0; i < 64; i += 4)
309 		sc->sc_pciregs[i/4] = pci_conf_read(pa->pa_pc, pa->pa_tag, i);
310 
311 #if BYTE_ORDER == LITTLE_ENDIAN
312 	val = (uint64_t)0xFFFFFFFFFFFFFFFFULL;
313 	val &= ~(TxF_R_SE | RxF_W_SE);
314 	PIF_WCSR(SWAPPER_CTRL, val);
315 	PIF_WCSR(SWAPPER_CTRL, val);
316 #elif BYTE_ORDER == BIG_ENDIAN
317 	/* do nothing */
318 #else
319 #error bad endianness!
320 #endif
321 
322 	if ((val = PIF_RCSR(PIF_RD_SWAPPER_Fb)) != SWAPPER_MAGIC) {
323 		aprint_error("%s: failed configuring endian, %llx != %llx!\n",
324 		    XNAME, (unsigned long long)val, SWAPPER_MAGIC);
325 		return;
326 	}
327 
328 	/*
329 	 * The MAC addr may be all FF's, which is not good.
330 	 * Resolve it by writing some magics to GPIO_CONTROL and
331 	 * force a chip reset to read in the serial eeprom again.
332 	 */
333 	for (i = 0; i < sizeof(fix_mac)/sizeof(fix_mac[0]); i++) {
334 		PIF_WCSR(GPIO_CONTROL, fix_mac[i]);
335 		PIF_RCSR(GPIO_CONTROL);
336 	}
337 
338 	/*
339 	 * Reset the chip and restore the PCI registers.
340 	 */
341 	PIF_WCSR(SW_RESET, 0xa5a5a50000000000ULL);
342 	DELAY(500000);
343 	for (i = 0; i < 64; i += 4)
344 		pci_conf_write(pa->pa_pc, pa->pa_tag, i, sc->sc_pciregs[i/4]);
345 
346 	/*
347 	 * Restore the byte order registers.
348 	 */
349 #if BYTE_ORDER == LITTLE_ENDIAN
350 	val = (uint64_t)0xFFFFFFFFFFFFFFFFULL;
351 	val &= ~(TxF_R_SE | RxF_W_SE);
352 	PIF_WCSR(SWAPPER_CTRL, val);
353 	PIF_WCSR(SWAPPER_CTRL, val);
354 #elif BYTE_ORDER == BIG_ENDIAN
355 	/* do nothing */
356 #else
357 #error bad endianness!
358 #endif
359 
360 	if ((val = PIF_RCSR(PIF_RD_SWAPPER_Fb)) != SWAPPER_MAGIC) {
361 		aprint_error("%s: failed configuring endian2, %llx != %llx!\n",
362 		    XNAME, (unsigned long long)val, SWAPPER_MAGIC);
363 		return;
364 	}
365 
366 	/*
367 	 * XGXS initialization.
368 	 */
369 	/* 29, reset */
370 	PIF_WCSR(SW_RESET, 0);
371 	DELAY(500000);
372 
373 	/* 30, configure XGXS transceiver */
374 	xge_setup_xgxs(sc);
375 
376 	/* 33, program MAC address (not needed here) */
377 	/* Get ethernet address */
378 	PIF_WCSR(RMAC_ADDR_CMD_MEM,
379 	    RMAC_ADDR_CMD_MEM_STR | RMAC_ADDR_CMD_MEM_OFF(0));
380 	while (PIF_RCSR(RMAC_ADDR_CMD_MEM) & RMAC_ADDR_CMD_MEM_STR)
381 		;
382 	val = PIF_RCSR(RMAC_ADDR_DATA0_MEM);
383 	for (i = 0; i < ETHER_ADDR_LEN; i++)
384 		enaddr[i] = (uint8_t)(val >> (56 - (8*i)));
385 
386 	/*
387 	 * Get memory for transmit descriptor lists.
388 	 */
389 	if (xge_alloc_txmem(sc)) {
390 		aprint_error("%s: failed allocating txmem.\n", XNAME);
391 		return;
392 	}
393 
394 	/* 9 and 10 - set FIFO number/prio */
395 	PIF_WCSR(TX_FIFO_P0, TX_FIFO_LEN0(NTXDESCS));
396 	PIF_WCSR(TX_FIFO_P1, 0ULL);
397 	PIF_WCSR(TX_FIFO_P2, 0ULL);
398 	PIF_WCSR(TX_FIFO_P3, 0ULL);
399 
400 	/* 11, XXX set round-robin prio? */
401 
402 	/* 12, enable transmit FIFO */
403 	val = PIF_RCSR(TX_FIFO_P0);
404 	val |= TX_FIFO_ENABLE;
405 	PIF_WCSR(TX_FIFO_P0, val);
406 
407 	/* 13, disable some error checks */
408 	PIF_WCSR(TX_PA_CFG,
409 	    TX_PA_CFG_IFR | TX_PA_CFG_ISO | TX_PA_CFG_ILC | TX_PA_CFG_ILE);
410 
411 	/*
412 	 * Create transmit DMA maps.
413 	 * Make them large for TSO.
414 	 */
415 	for (i = 0; i < NTXDESCS; i++) {
416 		if (bus_dmamap_create(sc->sc_dmat, XGE_IP_MAXPACKET,
417 		    NTXFRAGS, MCLBYTES, 0, 0, &sc->sc_txm[i])) {
418 			aprint_error("%s: cannot create TX DMA maps\n", XNAME);
419 			return;
420 		}
421 	}
422 
423 	sc->sc_lasttx = NTXDESCS-1;
424 
425 	/*
426 	 * RxDMA initialization.
427 	 * Only use one out of 8 possible receive queues.
428 	 */
429 	if (xge_alloc_rxmem(sc)) {	/* allocate rx descriptor memory */
430 		aprint_error("%s: failed allocating rxmem\n", XNAME);
431 		return;
432 	}
433 
434 	/* Create receive buffer DMA maps */
435 	for (i = 0; i < NRXREAL; i++) {
436 		if (bus_dmamap_create(sc->sc_dmat, XGE_MAX_MTU,
437 		    NRXFRAGS, MCLBYTES, 0, 0, &sc->sc_rxm[i])) {
438 			aprint_error("%s: cannot create RX DMA maps\n", XNAME);
439 			return;
440 		}
441 	}
442 
443 	/* allocate mbufs to receive descriptors */
444 	for (i = 0; i < NRXREAL; i++)
445 		if (xge_add_rxbuf(sc, i))
446 			panic("out of mbufs too early");
447 
448 	/* 14, setup receive ring priority */
449 	PIF_WCSR(RX_QUEUE_PRIORITY, 0ULL); /* only use one ring */
450 
451 	/* 15, setup receive ring round-robin calendar */
452 	PIF_WCSR(RX_W_ROUND_ROBIN_0, 0ULL); /* only use one ring */
453 	PIF_WCSR(RX_W_ROUND_ROBIN_1, 0ULL);
454 	PIF_WCSR(RX_W_ROUND_ROBIN_2, 0ULL);
455 	PIF_WCSR(RX_W_ROUND_ROBIN_3, 0ULL);
456 	PIF_WCSR(RX_W_ROUND_ROBIN_4, 0ULL);
457 
458 	/* 16, write receive ring start address */
459 	PIF_WCSR(PRC_RXD0_0, (uint64_t)sc->sc_rxmap->dm_segs[0].ds_addr);
460 	/* PRC_RXD0_[1-7] are not used */
461 
462 	/* 17, Setup alarm registers */
463 	PIF_WCSR(PRC_ALARM_ACTION, 0ULL); /* Default everything to retry */
464 
465 	/* 18, init receive ring controller */
466 #if RX_MODE == RX_MODE_1
467 	val = RING_MODE_1;
468 #elif RX_MODE == RX_MODE_3
469 	val = RING_MODE_3;
470 #else /* RX_MODE == RX_MODE_5 */
471 	val = RING_MODE_5;
472 #endif
473 	PIF_WCSR(PRC_CTRL_0, RC_IN_SVC | val);
474 	/* leave 1-7 disabled */
475 	/* XXXX snoop configuration? */
476 
477 	/* 19, set chip memory assigned to the queue */
478 	PIF_WCSR(RX_QUEUE_CFG, MC_QUEUE(0, 64)); /* all 64M to queue 0 */
479 
480 	/* 20, setup RLDRAM parameters */
481 	/* do not touch it for now */
482 
483 	/* 21, setup pause frame thresholds */
484 	/* so not touch the defaults */
485 	/* XXX - must 0xff be written as stated in the manual? */
486 
487 	/* 22, configure RED */
488 	/* we do not want to drop packets, so ignore */
489 
490 	/* 23, initiate RLDRAM */
491 	val = PIF_RCSR(MC_RLDRAM_MRS);
492 	val |= MC_QUEUE_SIZE_ENABLE | MC_RLDRAM_MRS_ENABLE;
493 	PIF_WCSR(MC_RLDRAM_MRS, val);
494 	DELAY(1000);
495 
496 	/*
497 	 * Setup interrupt policies.
498 	 */
499 	/* 40, Transmit interrupts */
500 	PIF_WCSR(TTI_DATA1_MEM, TX_TIMER_VAL(0x1ff) | TX_TIMER_AC |
501 	    TX_URNG_A(5) | TX_URNG_B(20) | TX_URNG_C(48));
502 	PIF_WCSR(TTI_DATA2_MEM,
503 	    TX_UFC_A(25) | TX_UFC_B(64) | TX_UFC_C(128) | TX_UFC_D(512));
504 	PIF_WCSR(TTI_COMMAND_MEM, TTI_CMD_MEM_WE | TTI_CMD_MEM_STROBE);
505 	while (PIF_RCSR(TTI_COMMAND_MEM) & TTI_CMD_MEM_STROBE)
506 		;
507 
508 	/* 41, Receive interrupts */
509 	PIF_WCSR(RTI_DATA1_MEM, RX_TIMER_VAL(0x800) | RX_TIMER_AC |
510 	    RX_URNG_A(5) | RX_URNG_B(20) | RX_URNG_C(50));
511 	PIF_WCSR(RTI_DATA2_MEM,
512 	    RX_UFC_A(64) | RX_UFC_B(128) | RX_UFC_C(256) | RX_UFC_D(512));
513 	PIF_WCSR(RTI_COMMAND_MEM, RTI_CMD_MEM_WE | RTI_CMD_MEM_STROBE);
514 	while (PIF_RCSR(RTI_COMMAND_MEM) & RTI_CMD_MEM_STROBE)
515 		;
516 
517 	/*
518 	 * Setup media stuff.
519 	 */
520 	sc->sc_ethercom.ec_ifmedia = &sc->xena_media;
521 	ifmedia_init(&sc->xena_media, IFM_IMASK, xge_xgmii_mediachange,
522 	    xge_ifmedia_status);
523 	ifmedia_add(&sc->xena_media, IFM_ETHER | IFM_10G_LR, 0, NULL);
524 	ifmedia_set(&sc->xena_media, IFM_ETHER | IFM_10G_LR);
525 
526 	aprint_normal("%s: Ethernet address %s\n", XNAME,
527 	    ether_sprintf(enaddr));
528 
529 	ifp = &sc->sc_ethercom.ec_if;
530 	strlcpy(ifp->if_xname, device_xname(sc->sc_dev), IFNAMSIZ);
531 	ifp->if_baudrate = 10000000000LL;
532 	ifp->if_init = xge_init;
533 	ifp->if_stop = xge_stop;
534 	ifp->if_softc = sc;
535 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
536 	ifp->if_ioctl = xge_ioctl;
537 	ifp->if_start = xge_start;
538 	IFQ_SET_MAXLEN(&ifp->if_snd, uimax(NTXDESCS - 1, IFQ_MAXLEN));
539 	IFQ_SET_READY(&ifp->if_snd);
540 
541 	/*
542 	 * Offloading capabilities.
543 	 */
544 	sc->sc_ethercom.ec_capabilities |=
545 	    ETHERCAP_JUMBO_MTU | ETHERCAP_VLAN_MTU;
546 	ifp->if_capabilities |=
547 	    IFCAP_CSUM_IPv4_Rx | IFCAP_CSUM_IPv4_Tx |
548 	    IFCAP_CSUM_TCPv4_Rx | IFCAP_CSUM_TCPv4_Tx |
549 	    IFCAP_CSUM_UDPv4_Rx | IFCAP_CSUM_UDPv4_Tx | IFCAP_TSOv4;
550 
551 	/*
552 	 * Attach the interface.
553 	 */
554 	if_attach(ifp);
555 	if_deferred_start_init(ifp, NULL);
556 	ether_ifattach(ifp, enaddr);
557 
558 	/*
559 	 * Setup interrupt vector before initializing.
560 	 */
561 	if (pci_intr_map(pa, &ih)) {
562 		aprint_error_dev(sc->sc_dev, "unable to map interrupt\n");
563 		return;
564 	}
565 	intrstr = pci_intr_string(pc, ih, intrbuf, sizeof(intrbuf));
566 	sc->sc_ih = pci_intr_establish_xname(pc, ih, IPL_NET, xge_intr, sc,
567 	    device_xname(self));
568 	if (sc->sc_ih == NULL) {
569 		aprint_error_dev(sc->sc_dev,
570 		    "unable to establish interrupt at %s\n",
571 		    intrstr ? intrstr : "<unknown>");
572 		return;
573 	}
574 	aprint_normal_dev(sc->sc_dev, "interrupting at %s\n", intrstr);
575 
576 #ifdef XGE_EVENT_COUNTERS
577 	evcnt_attach_dynamic(&sc->sc_intr, EVCNT_TYPE_MISC,
578 	    NULL, XNAME, "intr");
579 	evcnt_attach_dynamic(&sc->sc_txintr, EVCNT_TYPE_MISC,
580 	    NULL, XNAME, "txintr");
581 	evcnt_attach_dynamic(&sc->sc_rxintr, EVCNT_TYPE_MISC,
582 	    NULL, XNAME, "rxintr");
583 	evcnt_attach_dynamic(&sc->sc_txqe, EVCNT_TYPE_MISC,
584 	    NULL, XNAME, "txqe");
585 #endif
586 }
587 
588 void
589 xge_ifmedia_status(struct ifnet *ifp, struct ifmediareq *ifmr)
590 {
591 	struct xge_softc *sc = ifp->if_softc;
592 	uint64_t reg;
593 
594 	ifmr->ifm_status = IFM_AVALID;
595 	ifmr->ifm_active = IFM_ETHER | IFM_10G_LR;
596 
597 	reg = PIF_RCSR(ADAPTER_STATUS);
598 	if ((reg & (RMAC_REMOTE_FAULT | RMAC_LOCAL_FAULT)) == 0)
599 		ifmr->ifm_status |= IFM_ACTIVE;
600 }
601 
602 int
603 xge_xgmii_mediachange(struct ifnet *ifp)
604 {
605 	return 0;
606 }
607 
608 static void
609 xge_enable(struct xge_softc *sc)
610 {
611 	uint64_t val;
612 
613 	/* 2, enable adapter */
614 	val = PIF_RCSR(ADAPTER_CONTROL);
615 	val |= ADAPTER_EN;
616 	PIF_WCSR(ADAPTER_CONTROL, val);
617 
618 	/* 3, light the card enable led */
619 	val = PIF_RCSR(ADAPTER_CONTROL);
620 	val |= LED_ON;
621 	PIF_WCSR(ADAPTER_CONTROL, val);
622 	printf("%s: link up\n", XNAME);
623 
624 }
625 
626 int
627 xge_init(struct ifnet *ifp)
628 {
629 	struct xge_softc *sc = ifp->if_softc;
630 	uint64_t val;
631 
632 	if (ifp->if_flags & IFF_RUNNING)
633 		return 0;
634 
635 	/* 31+32, setup MAC config */
636 	PIF_WKEY(MAC_CFG, TMAC_EN | RMAC_EN | TMAC_APPEND_PAD |
637 	    RMAC_STRIP_FCS | RMAC_BCAST_EN | RMAC_DISCARD_PFRM | RMAC_PROM_EN);
638 
639 	DELAY(1000);
640 
641 	/* 54, ensure that the adapter is 'quiescent' */
642 	val = PIF_RCSR(ADAPTER_STATUS);
643 	if ((val & QUIESCENT) != QUIESCENT) {
644 		char buf[200];
645 		printf("%s: adapter not quiescent, aborting\n", XNAME);
646 		val = (val & QUIESCENT) ^ QUIESCENT;
647 		snprintb(buf, sizeof buf, QUIESCENT_BMSK, val);
648 		printf("%s: ADAPTER_STATUS missing bits %s\n", XNAME, buf);
649 		return 1;
650 	}
651 
652 	/* 56, enable the transmit laser */
653 	val = PIF_RCSR(ADAPTER_CONTROL);
654 	val |= EOI_TX_ON;
655 	PIF_WCSR(ADAPTER_CONTROL, val);
656 
657 	xge_enable(sc);
658 	/*
659 	 * Enable all interrupts
660 	 */
661 	PIF_WCSR(TX_TRAFFIC_MASK, 0);
662 	PIF_WCSR(RX_TRAFFIC_MASK, 0);
663 	PIF_WCSR(GENERAL_INT_MASK, 0);
664 	PIF_WCSR(TXPIC_INT_MASK, 0);
665 	PIF_WCSR(RXPIC_INT_MASK, 0);
666 	PIF_WCSR(MAC_INT_MASK, MAC_TMAC_INT); /* only from RMAC */
667 	PIF_WCSR(MAC_RMAC_ERR_MASK, ~RMAC_LINK_STATE_CHANGE_INT);
668 
669 
670 	/* Done... */
671 	ifp->if_flags |= IFF_RUNNING;
672 	ifp->if_flags &= ~IFF_OACTIVE;
673 
674 	return 0;
675 }
676 
677 static void
678 xge_stop(struct ifnet *ifp, int disable)
679 {
680 	struct xge_softc *sc = ifp->if_softc;
681 	uint64_t val;
682 
683 	val = PIF_RCSR(ADAPTER_CONTROL);
684 	val &= ~ADAPTER_EN;
685 	PIF_WCSR(ADAPTER_CONTROL, val);
686 
687 	while ((PIF_RCSR(ADAPTER_STATUS) & QUIESCENT) != QUIESCENT)
688 		;
689 }
690 
691 int
692 xge_intr(void *pv)
693 {
694 	struct xge_softc *sc = pv;
695 	struct txd *txd;
696 	struct ifnet *ifp = &sc->sc_if;
697 	bus_dmamap_t dmp;
698 	uint64_t val;
699 	int i, lasttx, plen;
700 
701 	val = PIF_RCSR(GENERAL_INT_STATUS);
702 	if (val == 0)
703 		return 0; /* no interrupt here */
704 
705 	XGE_EVCNT_INCR(&sc->sc_intr);
706 
707 	PIF_WCSR(GENERAL_INT_STATUS, val);
708 
709 	if ((val = PIF_RCSR(MAC_RMAC_ERR_REG)) & RMAC_LINK_STATE_CHANGE_INT) {
710 		/* Wait for quiescence */
711 		printf("%s: link down\n", XNAME);
712 		while ((PIF_RCSR(ADAPTER_STATUS) & QUIESCENT) != QUIESCENT)
713 			;
714 		PIF_WCSR(MAC_RMAC_ERR_REG, RMAC_LINK_STATE_CHANGE_INT);
715 
716 		val = PIF_RCSR(ADAPTER_STATUS);
717 		if ((val & (RMAC_REMOTE_FAULT | RMAC_LOCAL_FAULT)) == 0)
718 			xge_enable(sc); /* Only if link restored */
719 	}
720 
721 	if ((val = PIF_RCSR(TX_TRAFFIC_INT))) {
722 		XGE_EVCNT_INCR(&sc->sc_txintr);
723 		PIF_WCSR(TX_TRAFFIC_INT, val); /* clear interrupt bits */
724 	}
725 	/*
726 	 * Collect sent packets.
727 	 */
728 	lasttx = sc->sc_lasttx;
729 	while ((i = NEXTTX(sc->sc_lasttx)) != sc->sc_nexttx) {
730 		txd = sc->sc_txd[i];
731 		dmp = sc->sc_txm[i];
732 
733 		bus_dmamap_sync(sc->sc_dmat, dmp, 0,
734 		    dmp->dm_mapsize,
735 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
736 
737 		if (txd->txd_control1 & TXD_CTL1_OWN) {
738 			bus_dmamap_sync(sc->sc_dmat, dmp, 0,
739 			    dmp->dm_mapsize, BUS_DMASYNC_PREREAD);
740 			break;
741 		}
742 		bus_dmamap_unload(sc->sc_dmat, dmp);
743 		m_freem(sc->sc_txb[i]);
744 		ifp->if_opackets++;
745 		sc->sc_lasttx = i;
746 	}
747 	if (i == sc->sc_nexttx) {
748 		XGE_EVCNT_INCR(&sc->sc_txqe);
749 	}
750 
751 	if (sc->sc_lasttx != lasttx)
752 		ifp->if_flags &= ~IFF_OACTIVE;
753 
754 	/* Try to get more packets on the wire */
755 	if_schedule_deferred_start(ifp);
756 
757 	if ((val = PIF_RCSR(RX_TRAFFIC_INT))) {
758 		XGE_EVCNT_INCR(&sc->sc_rxintr);
759 		PIF_WCSR(RX_TRAFFIC_INT, val); /* Clear interrupt bits */
760 	}
761 
762 	for (;;) {
763 		struct rxdesc *rxd;
764 		struct mbuf *m;
765 
766 		XGE_RXSYNC(sc->sc_nextrx,
767 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
768 
769 		rxd = XGE_RXD(sc->sc_nextrx);
770 		if (rxd->rxd_control1 & RXD_CTL1_OWN) {
771 			XGE_RXSYNC(sc->sc_nextrx, BUS_DMASYNC_PREREAD);
772 			break;
773 		}
774 
775 		/* Got a packet */
776 		m = sc->sc_rxb[sc->sc_nextrx];
777 #if RX_MODE == RX_MODE_1
778 		plen = m->m_len = RXD_CTL2_BUF0SIZ(rxd->rxd_control2);
779 #elif RX_MODE == RX_MODE_3
780 #error Fix rxmodes in xge_intr
781 #elif RX_MODE == RX_MODE_5
782 		plen = m->m_len = RXD_CTL2_BUF0SIZ(rxd->rxd_control2);
783 		plen += m->m_next->m_len = RXD_CTL2_BUF1SIZ(rxd->rxd_control2);
784 		plen += m->m_next->m_next->m_len =
785 		    RXD_CTL2_BUF2SIZ(rxd->rxd_control2);
786 		plen += m->m_next->m_next->m_next->m_len =
787 		    RXD_CTL3_BUF3SIZ(rxd->rxd_control3);
788 		plen += m->m_next->m_next->m_next->m_next->m_len =
789 		    RXD_CTL3_BUF4SIZ(rxd->rxd_control3);
790 #endif
791 		m_set_rcvif(m, ifp);
792 		m->m_pkthdr.len = plen;
793 
794 		val = rxd->rxd_control1;
795 
796 		if (xge_add_rxbuf(sc, sc->sc_nextrx)) {
797 			/* Failed, recycle this mbuf */
798 #if RX_MODE == RX_MODE_1
799 			rxd->rxd_control2 = RXD_MKCTL2(MCLBYTES, 0, 0);
800 			rxd->rxd_control1 = RXD_CTL1_OWN;
801 #elif RX_MODE == RX_MODE_3
802 #elif RX_MODE == RX_MODE_5
803 #endif
804 			XGE_RXSYNC(sc->sc_nextrx,
805 			    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
806 			ifp->if_ierrors++;
807 			break;
808 		}
809 
810 		if (RXD_CTL1_PROTOS(val) & (RXD_CTL1_P_IPv4|RXD_CTL1_P_IPv6)) {
811 			m->m_pkthdr.csum_flags |= M_CSUM_IPv4;
812 			if (RXD_CTL1_L3CSUM(val) != 0xffff)
813 				m->m_pkthdr.csum_flags |= M_CSUM_IPv4_BAD;
814 		}
815 		if (RXD_CTL1_PROTOS(val) & RXD_CTL1_P_TCP) {
816 			m->m_pkthdr.csum_flags |= M_CSUM_TCPv4 | M_CSUM_TCPv6;
817 			if (RXD_CTL1_L4CSUM(val) != 0xffff)
818 				m->m_pkthdr.csum_flags |= M_CSUM_TCP_UDP_BAD;
819 		}
820 		if (RXD_CTL1_PROTOS(val) & RXD_CTL1_P_UDP) {
821 			m->m_pkthdr.csum_flags |= M_CSUM_UDPv4 | M_CSUM_UDPv6;
822 			if (RXD_CTL1_L4CSUM(val) != 0xffff)
823 				m->m_pkthdr.csum_flags |= M_CSUM_TCP_UDP_BAD;
824 		}
825 
826 		if_percpuq_enqueue(ifp->if_percpuq, m);
827 
828 		if (++sc->sc_nextrx == NRXREAL)
829 			sc->sc_nextrx = 0;
830 
831 	}
832 
833 	return 0;
834 }
835 
836 int
837 xge_ioctl(struct ifnet *ifp, u_long cmd, void *data)
838 {
839 	struct xge_softc *sc = ifp->if_softc;
840 	struct ifreq *ifr = (struct ifreq *) data;
841 	int s, error = 0;
842 
843 	s = splnet();
844 
845 	switch (cmd) {
846 	case SIOCSIFMTU:
847 		if (ifr->ifr_mtu < ETHERMIN || ifr->ifr_mtu > XGE_MAX_MTU)
848 			error = EINVAL;
849 		else if ((error = ifioctl_common(ifp, cmd, data))
850 		    == ENETRESET) {
851 			PIF_WCSR(RMAC_MAX_PYLD_LEN,
852 			    RMAC_PYLD_LEN(ifr->ifr_mtu));
853 			error = 0;
854 		}
855 		break;
856 
857 	default:
858 		if ((error = ether_ioctl(ifp, cmd, data)) != ENETRESET)
859 			break;
860 
861 		error = 0;
862 
863 		if (cmd != SIOCADDMULTI && cmd != SIOCDELMULTI)
864 			;
865 		else if (ifp->if_flags & IFF_RUNNING) {
866 			/* Change multicast list */
867 			xge_mcast_filter(sc);
868 		}
869 		break;
870 	}
871 
872 	splx(s);
873 	return error;
874 }
875 
876 void
877 xge_mcast_filter(struct xge_softc *sc)
878 {
879 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
880 	struct ethercom *ec = &sc->sc_ethercom;
881 	struct ether_multi *enm;
882 	struct ether_multistep step;
883 	int i, numaddr = 1; /* first slot used for card unicast address */
884 	uint64_t val;
885 
886 	ETHER_LOCK(ec);
887 	ETHER_FIRST_MULTI(step, ec, enm);
888 	while (enm != NULL) {
889 		if (memcmp(enm->enm_addrlo, enm->enm_addrhi, ETHER_ADDR_LEN)) {
890 			/* Skip ranges */
891 			ETHER_UNLOCK(ec);
892 			goto allmulti;
893 		}
894 		if (numaddr == MAX_MCAST_ADDR) {
895 			ETHER_UNLOCK(ec);
896 			goto allmulti;
897 		}
898 		for (val = 0, i = 0; i < ETHER_ADDR_LEN; i++) {
899 			val <<= 8;
900 			val |= enm->enm_addrlo[i];
901 		}
902 		PIF_WCSR(RMAC_ADDR_DATA0_MEM, val << 16);
903 		PIF_WCSR(RMAC_ADDR_DATA1_MEM, 0xFFFFFFFFFFFFFFFFULL);
904 		PIF_WCSR(RMAC_ADDR_CMD_MEM, RMAC_ADDR_CMD_MEM_WE |
905 		    RMAC_ADDR_CMD_MEM_STR | RMAC_ADDR_CMD_MEM_OFF(numaddr));
906 		while (PIF_RCSR(RMAC_ADDR_CMD_MEM) & RMAC_ADDR_CMD_MEM_STR)
907 			;
908 		numaddr++;
909 		ETHER_NEXT_MULTI(step, enm);
910 	}
911 	ETHER_UNLOCK(ec);
912 	/* set the remaining entries to the broadcast address */
913 	for (i = numaddr; i < MAX_MCAST_ADDR; i++) {
914 		PIF_WCSR(RMAC_ADDR_DATA0_MEM, 0xffffffffffff0000ULL);
915 		PIF_WCSR(RMAC_ADDR_DATA1_MEM, 0xFFFFFFFFFFFFFFFFULL);
916 		PIF_WCSR(RMAC_ADDR_CMD_MEM, RMAC_ADDR_CMD_MEM_WE |
917 		    RMAC_ADDR_CMD_MEM_STR | RMAC_ADDR_CMD_MEM_OFF(i));
918 		while (PIF_RCSR(RMAC_ADDR_CMD_MEM) & RMAC_ADDR_CMD_MEM_STR)
919 			;
920 	}
921 	ifp->if_flags &= ~IFF_ALLMULTI;
922 	return;
923 
924 allmulti:
925 	/* Just receive everything with the multicast bit set */
926 	ifp->if_flags |= IFF_ALLMULTI;
927 	PIF_WCSR(RMAC_ADDR_DATA0_MEM, 0x8000000000000000ULL);
928 	PIF_WCSR(RMAC_ADDR_DATA1_MEM, 0xF000000000000000ULL);
929 	PIF_WCSR(RMAC_ADDR_CMD_MEM, RMAC_ADDR_CMD_MEM_WE |
930 	    RMAC_ADDR_CMD_MEM_STR | RMAC_ADDR_CMD_MEM_OFF(1));
931 	while (PIF_RCSR(RMAC_ADDR_CMD_MEM) & RMAC_ADDR_CMD_MEM_STR)
932 		;
933 }
934 
935 void
936 xge_start(struct ifnet *ifp)
937 {
938 	struct xge_softc *sc = ifp->if_softc;
939 	struct txd *txd = NULL; /* XXX - gcc */
940 	bus_dmamap_t dmp;
941 	struct	mbuf *m;
942 	uint64_t par, lcr;
943 	int nexttx = 0, ntxd, error, i;
944 
945 	if ((ifp->if_flags & (IFF_RUNNING | IFF_OACTIVE)) != IFF_RUNNING)
946 		return;
947 
948 	par = lcr = 0;
949 	for (;;) {
950 		IFQ_POLL(&ifp->if_snd, m);
951 		if (m == NULL)
952 			break;	/* out of packets */
953 
954 		if (sc->sc_nexttx == sc->sc_lasttx)
955 			break;	/* No more space */
956 
957 		nexttx = sc->sc_nexttx;
958 		dmp = sc->sc_txm[nexttx];
959 
960 		if ((error = bus_dmamap_load_mbuf(sc->sc_dmat, dmp, m,
961 		    BUS_DMA_WRITE | BUS_DMA_NOWAIT)) != 0) {
962 			printf("%s: bus_dmamap_load_mbuf error %d\n",
963 			    XNAME, error);
964 			break;
965 		}
966 		IFQ_DEQUEUE(&ifp->if_snd, m);
967 
968 		bus_dmamap_sync(sc->sc_dmat, dmp, 0, dmp->dm_mapsize,
969 		    BUS_DMASYNC_PREWRITE);
970 
971 		txd = sc->sc_txd[nexttx];
972 		sc->sc_txb[nexttx] = m;
973 		for (i = 0; i < dmp->dm_nsegs; i++) {
974 			if (dmp->dm_segs[i].ds_len == 0)
975 				continue;
976 			txd->txd_control1 = dmp->dm_segs[i].ds_len;
977 			txd->txd_control2 = 0;
978 			txd->txd_bufaddr = dmp->dm_segs[i].ds_addr;
979 			txd++;
980 		}
981 		ntxd = txd - sc->sc_txd[nexttx] - 1;
982 		txd = sc->sc_txd[nexttx];
983 		txd->txd_control1 |= TXD_CTL1_OWN | TXD_CTL1_GCF;
984 		txd->txd_control2 = TXD_CTL2_UTIL;
985 		if (m->m_pkthdr.csum_flags & M_CSUM_TSOv4) {
986 			txd->txd_control1 |= TXD_CTL1_MSS(m->m_pkthdr.segsz);
987 			txd->txd_control1 |= TXD_CTL1_LSO;
988 		}
989 
990 		if (m->m_pkthdr.csum_flags & M_CSUM_IPv4)
991 			txd->txd_control2 |= TXD_CTL2_CIPv4;
992 		if (m->m_pkthdr.csum_flags & M_CSUM_TCPv4)
993 			txd->txd_control2 |= TXD_CTL2_CTCP;
994 		if (m->m_pkthdr.csum_flags & M_CSUM_UDPv4)
995 			txd->txd_control2 |= TXD_CTL2_CUDP;
996 		txd[ntxd].txd_control1 |= TXD_CTL1_GCL;
997 
998 		bus_dmamap_sync(sc->sc_dmat, dmp, 0, dmp->dm_mapsize,
999 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1000 
1001 		par = sc->sc_txdp[nexttx];
1002 		lcr = TXDL_NUMTXD(ntxd) | TXDL_LGC_FIRST | TXDL_LGC_LAST;
1003 		if (m->m_pkthdr.csum_flags & M_CSUM_TSOv4)
1004 			lcr |= TXDL_SFF;
1005 		TXP_WCSR(TXDL_PAR, par);
1006 		TXP_WCSR(TXDL_LCR, lcr);
1007 
1008 		bpf_mtap(ifp, m, BPF_D_OUT);
1009 
1010 		sc->sc_nexttx = NEXTTX(nexttx);
1011 	}
1012 }
1013 
1014 /*
1015  * Allocate DMA memory for transmit descriptor fragments.
1016  * Only one map is used for all descriptors.
1017  */
1018 int
1019 xge_alloc_txmem(struct xge_softc *sc)
1020 {
1021 	struct txd *txp;
1022 	bus_dma_segment_t seg;
1023 	bus_addr_t txdp;
1024 	void *kva;
1025 	int i, rseg, state;
1026 
1027 #define TXMAPSZ (NTXDESCS*NTXFRAGS*sizeof(struct txd))
1028 	state = 0;
1029 	if (bus_dmamem_alloc(sc->sc_dmat, TXMAPSZ, PAGE_SIZE, 0,
1030 	    &seg, 1, &rseg, BUS_DMA_NOWAIT))
1031 		goto err;
1032 	state++;
1033 	if (bus_dmamem_map(sc->sc_dmat, &seg, rseg, TXMAPSZ, &kva,
1034 	    BUS_DMA_NOWAIT))
1035 		goto err;
1036 
1037 	state++;
1038 	if (bus_dmamap_create(sc->sc_dmat, TXMAPSZ, 1, TXMAPSZ, 0,
1039 	    BUS_DMA_NOWAIT, &sc->sc_txmap))
1040 		goto err;
1041 	state++;
1042 	if (bus_dmamap_load(sc->sc_dmat, sc->sc_txmap,
1043 	    kva, TXMAPSZ, NULL, BUS_DMA_NOWAIT))
1044 		goto err;
1045 
1046 	/* setup transmit array pointers */
1047 	txp = (struct txd *)kva;
1048 	txdp = seg.ds_addr;
1049 	for (txp = (struct txd *)kva, i = 0; i < NTXDESCS; i++) {
1050 		sc->sc_txd[i] = txp;
1051 		sc->sc_txdp[i] = txdp;
1052 		txp += NTXFRAGS;
1053 		txdp += (NTXFRAGS * sizeof(struct txd));
1054 	}
1055 
1056 	return 0;
1057 
1058 err:
1059 	if (state > 2)
1060 		bus_dmamap_destroy(sc->sc_dmat, sc->sc_txmap);
1061 	if (state > 1)
1062 		bus_dmamem_unmap(sc->sc_dmat, kva, TXMAPSZ);
1063 	if (state > 0)
1064 		bus_dmamem_free(sc->sc_dmat, &seg, rseg);
1065 	return ENOBUFS;
1066 }
1067 
1068 /*
1069  * Allocate DMA memory for receive descriptor,
1070  * only one map is used for all descriptors.
1071  * link receive descriptor pages together.
1072  */
1073 int
1074 xge_alloc_rxmem(struct xge_softc *sc)
1075 {
1076 	struct rxd_4k *rxpp;
1077 	bus_dma_segment_t seg;
1078 	void *kva;
1079 	int i, rseg, state;
1080 
1081 	/* sanity check */
1082 	if (sizeof(struct rxd_4k) != XGE_PAGE) {
1083 		printf("bad compiler struct alignment, %d != %d\n",
1084 		    (int)sizeof(struct rxd_4k), XGE_PAGE);
1085 		return EINVAL;
1086 	}
1087 
1088 	state = 0;
1089 	if (bus_dmamem_alloc(sc->sc_dmat, RXMAPSZ, PAGE_SIZE, 0,
1090 	    &seg, 1, &rseg, BUS_DMA_NOWAIT))
1091 		goto err;
1092 	state++;
1093 	if (bus_dmamem_map(sc->sc_dmat, &seg, rseg, RXMAPSZ, &kva,
1094 	    BUS_DMA_NOWAIT))
1095 		goto err;
1096 
1097 	state++;
1098 	if (bus_dmamap_create(sc->sc_dmat, RXMAPSZ, 1, RXMAPSZ, 0,
1099 	    BUS_DMA_NOWAIT, &sc->sc_rxmap))
1100 		goto err;
1101 	state++;
1102 	if (bus_dmamap_load(sc->sc_dmat, sc->sc_rxmap,
1103 	    kva, RXMAPSZ, NULL, BUS_DMA_NOWAIT))
1104 		goto err;
1105 
1106 	/* setup receive page link pointers */
1107 	for (rxpp = (struct rxd_4k *)kva, i = 0; i < NRXPAGES; i++, rxpp++) {
1108 		sc->sc_rxd_4k[i] = rxpp;
1109 		rxpp->r4_next = (uint64_t)sc->sc_rxmap->dm_segs[0].ds_addr +
1110 		    (i*sizeof(struct rxd_4k)) + sizeof(struct rxd_4k);
1111 	}
1112 	sc->sc_rxd_4k[NRXPAGES-1]->r4_next =
1113 	    (uint64_t)sc->sc_rxmap->dm_segs[0].ds_addr;
1114 
1115 	return 0;
1116 
1117 err:
1118 	if (state > 2)
1119 		bus_dmamap_destroy(sc->sc_dmat, sc->sc_txmap);
1120 	if (state > 1)
1121 		bus_dmamem_unmap(sc->sc_dmat, kva, TXMAPSZ);
1122 	if (state > 0)
1123 		bus_dmamem_free(sc->sc_dmat, &seg, rseg);
1124 	return ENOBUFS;
1125 }
1126 
1127 
1128 /*
1129  * Add a new mbuf chain to descriptor id.
1130  */
1131 int
1132 xge_add_rxbuf(struct xge_softc *sc, int id)
1133 {
1134 	struct rxdesc *rxd;
1135 	struct mbuf *m[5];
1136 	int page, desc, error;
1137 #if RX_MODE == RX_MODE_5
1138 	int i;
1139 #endif
1140 
1141 	page = id/NDESC_BUFMODE;
1142 	desc = id%NDESC_BUFMODE;
1143 
1144 	rxd = &sc->sc_rxd_4k[page]->r4_rxd[desc];
1145 
1146 	/*
1147 	 * Allocate mbufs.
1148 	 * Currently five mbufs and two clusters are used,
1149 	 * the hardware will put (ethernet, ip, tcp/udp) headers in
1150 	 * their own buffer and the clusters are only used for data.
1151 	 */
1152 #if RX_MODE == RX_MODE_1
1153 	MGETHDR(m[0], M_DONTWAIT, MT_DATA);
1154 	if (m[0] == NULL)
1155 		return ENOBUFS;
1156 	MCLGET(m[0], M_DONTWAIT);
1157 	if ((m[0]->m_flags & M_EXT) == 0) {
1158 		m_freem(m[0]);
1159 		return ENOBUFS;
1160 	}
1161 	m[0]->m_len = m[0]->m_pkthdr.len = m[0]->m_ext.ext_size;
1162 #elif RX_MODE == RX_MODE_3
1163 #error missing rxmode 3.
1164 #elif RX_MODE == RX_MODE_5
1165 	MGETHDR(m[0], M_DONTWAIT, MT_DATA);
1166 	for (i = 1; i < 5; i++) {
1167 		MGET(m[i], M_DONTWAIT, MT_DATA);
1168 	}
1169 	if (m[3])
1170 		MCLGET(m[3], M_DONTWAIT);
1171 	if (m[4])
1172 		MCLGET(m[4], M_DONTWAIT);
1173 	if (!m[0] || !m[1] || !m[2] || !m[3] || !m[4] ||
1174 	    ((m[3]->m_flags & M_EXT) == 0) || ((m[4]->m_flags & M_EXT) == 0)) {
1175 		/* Out of something */
1176 		for (i = 0; i < 5; i++)
1177 			if (m[i] != NULL)
1178 				m_free(m[i]);
1179 		return ENOBUFS;
1180 	}
1181 	/* Link'em together */
1182 	m[0]->m_next = m[1];
1183 	m[1]->m_next = m[2];
1184 	m[2]->m_next = m[3];
1185 	m[3]->m_next = m[4];
1186 #else
1187 #error bad mode RX_MODE
1188 #endif
1189 
1190 	if (sc->sc_rxb[id])
1191 		bus_dmamap_unload(sc->sc_dmat, sc->sc_rxm[id]);
1192 	sc->sc_rxb[id] = m[0];
1193 
1194 	error = bus_dmamap_load_mbuf(sc->sc_dmat, sc->sc_rxm[id], m[0],
1195 	    BUS_DMA_READ | BUS_DMA_NOWAIT);
1196 	if (error)
1197 		return error;
1198 	bus_dmamap_sync(sc->sc_dmat, sc->sc_rxm[id], 0,
1199 	    sc->sc_rxm[id]->dm_mapsize, BUS_DMASYNC_PREREAD);
1200 
1201 #if RX_MODE == RX_MODE_1
1202 	rxd->rxd_control2 = RXD_MKCTL2(m[0]->m_len, 0, 0);
1203 	rxd->rxd_buf0 = (uint64_t)sc->sc_rxm[id]->dm_segs[0].ds_addr;
1204 	rxd->rxd_control1 = RXD_CTL1_OWN;
1205 #elif RX_MODE == RX_MODE_3
1206 #elif RX_MODE == RX_MODE_5
1207 	rxd->rxd_control3 = RXD_MKCTL3(0, m[3]->m_len, m[4]->m_len);
1208 	rxd->rxd_control2 = RXD_MKCTL2(m[0]->m_len, m[1]->m_len, m[2]->m_len);
1209 	rxd->rxd_buf0 = (uint64_t)sc->sc_rxm[id]->dm_segs[0].ds_addr;
1210 	rxd->rxd_buf1 = (uint64_t)sc->sc_rxm[id]->dm_segs[1].ds_addr;
1211 	rxd->rxd_buf2 = (uint64_t)sc->sc_rxm[id]->dm_segs[2].ds_addr;
1212 	rxd->rxd_buf3 = (uint64_t)sc->sc_rxm[id]->dm_segs[3].ds_addr;
1213 	rxd->rxd_buf4 = (uint64_t)sc->sc_rxm[id]->dm_segs[4].ds_addr;
1214 	rxd->rxd_control1 = RXD_CTL1_OWN;
1215 #endif
1216 
1217 	XGE_RXSYNC(id, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1218 	return 0;
1219 }
1220 
1221 /*
1222  * These magics comes from the FreeBSD driver.
1223  */
1224 int
1225 xge_setup_xgxs(struct xge_softc *sc)
1226 {
1227 	/* The magic numbers are described in the users guide */
1228 
1229 	/* Writing to MDIO 0x8000 (Global Config 0) */
1230 	PIF_WCSR(DTX_CONTROL, 0x8000051500000000ULL); DELAY(50);
1231 	PIF_WCSR(DTX_CONTROL, 0x80000515000000E0ULL); DELAY(50);
1232 	PIF_WCSR(DTX_CONTROL, 0x80000515D93500E4ULL); DELAY(50);
1233 
1234 	/* Writing to MDIO 0x8000 (Global Config 1) */
1235 	PIF_WCSR(DTX_CONTROL, 0x8001051500000000ULL); DELAY(50);
1236 	PIF_WCSR(DTX_CONTROL, 0x80010515000000e0ULL); DELAY(50);
1237 	PIF_WCSR(DTX_CONTROL, 0x80010515001e00e4ULL); DELAY(50);
1238 
1239 	/* Reset the Gigablaze */
1240 	PIF_WCSR(DTX_CONTROL, 0x8002051500000000ULL); DELAY(50);
1241 	PIF_WCSR(DTX_CONTROL, 0x80020515000000E0ULL); DELAY(50);
1242 	PIF_WCSR(DTX_CONTROL, 0x80020515F21000E4ULL); DELAY(50);
1243 
1244 	/* read the pole settings */
1245 	PIF_WCSR(DTX_CONTROL, 0x8000051500000000ULL); DELAY(50);
1246 	PIF_WCSR(DTX_CONTROL, 0x80000515000000e0ULL); DELAY(50);
1247 	PIF_WCSR(DTX_CONTROL, 0x80000515000000ecULL); DELAY(50);
1248 
1249 	PIF_WCSR(DTX_CONTROL, 0x8001051500000000ULL); DELAY(50);
1250 	PIF_WCSR(DTX_CONTROL, 0x80010515000000e0ULL); DELAY(50);
1251 	PIF_WCSR(DTX_CONTROL, 0x80010515000000ecULL); DELAY(50);
1252 
1253 	PIF_WCSR(DTX_CONTROL, 0x8002051500000000ULL); DELAY(50);
1254 	PIF_WCSR(DTX_CONTROL, 0x80020515000000e0ULL); DELAY(50);
1255 	PIF_WCSR(DTX_CONTROL, 0x80020515000000ecULL); DELAY(50);
1256 
1257 	/* Workaround for TX Lane XAUI initialization error.
1258 	   Read Xpak PHY register 24 for XAUI lane status */
1259 	PIF_WCSR(DTX_CONTROL, 0x0018040000000000ULL); DELAY(50);
1260 	PIF_WCSR(DTX_CONTROL, 0x00180400000000e0ULL); DELAY(50);
1261 	PIF_WCSR(DTX_CONTROL, 0x00180400000000ecULL); DELAY(50);
1262 
1263 	/*
1264 	 * Reading the MDIO control with value 0x1804001c0F001c
1265 	 * means the TxLanes were already in sync
1266 	 * Reading the MDIO control with value 0x1804000c0x001c
1267 	 * means some TxLanes are not in sync where x is a 4-bit
1268 	 * value representing each lanes
1269 	 */
1270 #if 0
1271 	val = PIF_RCSR(MDIO_CONTROL);
1272 	if (val != 0x1804001c0F001cULL) {
1273 		printf("%s: MDIO_CONTROL: %llx != %llx\n",
1274 		    XNAME, val, 0x1804001c0F001cULL);
1275 		return 1;
1276 	}
1277 #endif
1278 
1279 	/* Set and remove the DTE XS INTLoopBackN */
1280 	PIF_WCSR(DTX_CONTROL, 0x0000051500000000ULL); DELAY(50);
1281 	PIF_WCSR(DTX_CONTROL, 0x00000515604000e0ULL); DELAY(50);
1282 	PIF_WCSR(DTX_CONTROL, 0x00000515604000e4ULL); DELAY(50);
1283 	PIF_WCSR(DTX_CONTROL, 0x00000515204000e4ULL); DELAY(50);
1284 	PIF_WCSR(DTX_CONTROL, 0x00000515204000ecULL); DELAY(50);
1285 
1286 #if 0
1287 	/* Reading the DTX control register Should be 0x5152040001c */
1288 	val = PIF_RCSR(DTX_CONTROL);
1289 	if (val != 0x5152040001cULL) {
1290 		printf("%s: DTX_CONTROL: %llx != %llx\n",
1291 		    XNAME, val, 0x5152040001cULL);
1292 		return 1;
1293 	}
1294 #endif
1295 
1296 	PIF_WCSR(MDIO_CONTROL, 0x0018040000000000ULL); DELAY(50);
1297 	PIF_WCSR(MDIO_CONTROL, 0x00180400000000e0ULL); DELAY(50);
1298 	PIF_WCSR(MDIO_CONTROL, 0x00180400000000ecULL); DELAY(50);
1299 
1300 #if 0
1301 	/* Reading the MIOD control should be 0x1804001c0f001c */
1302 	val = PIF_RCSR(MDIO_CONTROL);
1303 	if (val != 0x1804001c0f001cULL) {
1304 		printf("%s: MDIO_CONTROL2: %llx != %llx\n",
1305 		    XNAME, val, 0x1804001c0f001cULL);
1306 		return 1;
1307 	}
1308 #endif
1309 	return 0;
1310 }
1311