xref: /netbsd-src/sys/dev/ic/rtl8169.c (revision 154bfe8e089c1a0a4e9ed8414f08d3da90949162)
1 /*	$NetBSD: rtl8169.c,v 1.166 2020/03/13 04:08:07 thorpej Exp $	*/
2 
3 /*
4  * Copyright (c) 1997, 1998-2003
5  *	Bill Paul <wpaul@windriver.com>.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *	This product includes software developed by Bill Paul.
18  * 4. Neither the name of the author nor the names of any co-contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD
26  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
32  * THE POSSIBILITY OF SUCH DAMAGE.
33  */
34 
35 #include <sys/cdefs.h>
36 __KERNEL_RCSID(0, "$NetBSD: rtl8169.c,v 1.166 2020/03/13 04:08:07 thorpej Exp $");
37 /* $FreeBSD: /repoman/r/ncvs/src/sys/dev/re/if_re.c,v 1.20 2004/04/11 20:34:08 ru Exp $ */
38 
39 /*
40  * RealTek 8139C+/8169/8169S/8168/8110S PCI NIC driver
41  *
42  * Written by Bill Paul <wpaul@windriver.com>
43  * Senior Networking Software Engineer
44  * Wind River Systems
45  */
46 
47 /*
48  * This driver is designed to support RealTek's next generation of
49  * 10/100 and 10/100/1000 PCI ethernet controllers. There are currently
50  * six devices in this family: the RTL8139C+, the RTL8169, the RTL8169S,
51  * RTL8110S, the RTL8168 and the RTL8111.
52  *
53  * The 8139C+ is a 10/100 ethernet chip. It is backwards compatible
54  * with the older 8139 family, however it also supports a special
55  * C+ mode of operation that provides several new performance enhancing
56  * features. These include:
57  *
58  *	o Descriptor based DMA mechanism. Each descriptor represents
59  *	  a single packet fragment. Data buffers may be aligned on
60  *	  any byte boundary.
61  *
62  *	o 64-bit DMA
63  *
64  *	o TCP/IP checksum offload for both RX and TX
65  *
66  *	o High and normal priority transmit DMA rings
67  *
68  *	o VLAN tag insertion and extraction
69  *
70  *	o TCP large send (segmentation offload)
71  *
72  * Like the 8139, the 8139C+ also has a built-in 10/100 PHY. The C+
73  * programming API is fairly straightforward. The RX filtering, EEPROM
74  * access and PHY access is the same as it is on the older 8139 series
75  * chips.
76  *
77  * The 8169 is a 64-bit 10/100/1000 gigabit ethernet MAC. It has almost the
78  * same programming API and feature set as the 8139C+ with the following
79  * differences and additions:
80  *
81  *	o 1000Mbps mode
82  *
83  *	o Jumbo frames
84  *
85  *	o GMII and TBI ports/registers for interfacing with copper
86  *	  or fiber PHYs
87  *
88  *      o RX and TX DMA rings can have up to 1024 descriptors
89  *        (the 8139C+ allows a maximum of 64)
90  *
91  *	o Slight differences in register layout from the 8139C+
92  *
93  * The TX start and timer interrupt registers are at different locations
94  * on the 8169 than they are on the 8139C+. Also, the status word in the
95  * RX descriptor has a slightly different bit layout. The 8169 does not
96  * have a built-in PHY. Most reference boards use a Marvell 88E1000 'Alaska'
97  * copper gigE PHY.
98  *
99  * The 8169S/8110S 10/100/1000 devices have built-in copper gigE PHYs
100  * (the 'S' stands for 'single-chip'). These devices have the same
101  * programming API as the older 8169, but also have some vendor-specific
102  * registers for the on-board PHY. The 8110S is a LAN-on-motherboard
103  * part designed to be pin-compatible with the RealTek 8100 10/100 chip.
104  *
105  * This driver takes advantage of the RX and TX checksum offload and
106  * VLAN tag insertion/extraction features. It also implements TX
107  * interrupt moderation using the timer interrupt registers, which
108  * significantly reduces TX interrupt load. There is also support
109  * for jumbo frames, however the 8169/8169S/8110S can not transmit
110  * jumbo frames larger than 7.5K, so the max MTU possible with this
111  * driver is 7500 bytes.
112  */
113 
114 
115 #include <sys/param.h>
116 #include <sys/endian.h>
117 #include <sys/systm.h>
118 #include <sys/sockio.h>
119 #include <sys/mbuf.h>
120 #include <sys/malloc.h>
121 #include <sys/kernel.h>
122 #include <sys/socket.h>
123 #include <sys/device.h>
124 
125 #include <net/if.h>
126 #include <net/if_arp.h>
127 #include <net/if_dl.h>
128 #include <net/if_ether.h>
129 #include <net/if_media.h>
130 #include <net/if_vlanvar.h>
131 
132 #include <netinet/in_systm.h>	/* XXX for IP_MAXPACKET */
133 #include <netinet/in.h>		/* XXX for IP_MAXPACKET */
134 #include <netinet/ip.h>		/* XXX for IP_MAXPACKET */
135 
136 #include <net/bpf.h>
137 #include <sys/rndsource.h>
138 
139 #include <sys/bus.h>
140 
141 #include <dev/mii/mii.h>
142 #include <dev/mii/miivar.h>
143 
144 #include <dev/ic/rtl81x9reg.h>
145 #include <dev/ic/rtl81x9var.h>
146 
147 #include <dev/ic/rtl8169var.h>
148 
149 static inline void re_set_bufaddr(struct re_desc *, bus_addr_t);
150 
151 static int re_newbuf(struct rtk_softc *, int, struct mbuf *);
152 static int re_rx_list_init(struct rtk_softc *);
153 static int re_tx_list_init(struct rtk_softc *);
154 static void re_rxeof(struct rtk_softc *);
155 static void re_txeof(struct rtk_softc *);
156 static void re_tick(void *);
157 static void re_start(struct ifnet *);
158 static int re_ioctl(struct ifnet *, u_long, void *);
159 static int re_init(struct ifnet *);
160 static void re_stop(struct ifnet *, int);
161 static void re_watchdog(struct ifnet *);
162 
163 static int re_enable(struct rtk_softc *);
164 static void re_disable(struct rtk_softc *);
165 
166 static int re_gmii_readreg(device_t, int, int, uint16_t *);
167 static int re_gmii_writereg(device_t, int, int, uint16_t);
168 
169 static int re_miibus_readreg(device_t, int, int, uint16_t *);
170 static int re_miibus_writereg(device_t, int, int, uint16_t);
171 static void re_miibus_statchg(struct ifnet *);
172 
173 static void re_reset(struct rtk_softc *);
174 
175 static inline void
176 re_set_bufaddr(struct re_desc *d, bus_addr_t addr)
177 {
178 
179 	d->re_bufaddr_lo = htole32(RE_ADDR_LO(addr));
180 	d->re_bufaddr_hi = htole32(RE_ADDR_HI(addr));
181 }
182 
183 static int
184 re_gmii_readreg(device_t dev, int phy, int reg, uint16_t *val)
185 {
186 	struct rtk_softc *sc = device_private(dev);
187 	uint32_t data;
188 	int i;
189 
190 	if (phy != 7)
191 		return -1;
192 
193 	/* Let the rgephy driver read the GMEDIASTAT register */
194 
195 	if (reg == RTK_GMEDIASTAT) {
196 		*val = CSR_READ_1(sc, RTK_GMEDIASTAT);
197 		return 0;
198 	}
199 
200 	CSR_WRITE_4(sc, RTK_PHYAR, reg << 16);
201 	DELAY(1000);
202 
203 	for (i = 0; i < RTK_TIMEOUT; i++) {
204 		data = CSR_READ_4(sc, RTK_PHYAR);
205 		if (data & RTK_PHYAR_BUSY)
206 			break;
207 		DELAY(100);
208 	}
209 
210 	if (i == RTK_TIMEOUT) {
211 		printf("%s: PHY read failed\n", device_xname(sc->sc_dev));
212 		return ETIMEDOUT;
213 	}
214 
215 	*val = data & RTK_PHYAR_PHYDATA;
216 	return 0;
217 }
218 
219 static int
220 re_gmii_writereg(device_t dev, int phy, int reg, uint16_t val)
221 {
222 	struct rtk_softc *sc = device_private(dev);
223 	uint32_t data;
224 	int i;
225 
226 	CSR_WRITE_4(sc, RTK_PHYAR, (reg << 16) |
227 	    (val & RTK_PHYAR_PHYDATA) | RTK_PHYAR_BUSY);
228 	DELAY(1000);
229 
230 	for (i = 0; i < RTK_TIMEOUT; i++) {
231 		data = CSR_READ_4(sc, RTK_PHYAR);
232 		if (!(data & RTK_PHYAR_BUSY))
233 			break;
234 		DELAY(100);
235 	}
236 
237 	if (i == RTK_TIMEOUT) {
238 		printf("%s: PHY write reg %x <- %hx failed\n",
239 		    device_xname(sc->sc_dev), reg, val);
240 		return ETIMEDOUT;
241 	}
242 
243 	return 0;
244 }
245 
246 static int
247 re_miibus_readreg(device_t dev, int phy, int reg, uint16_t *val)
248 {
249 	struct rtk_softc *sc = device_private(dev);
250 	uint16_t re8139_reg = 0;
251 	int s, rv = 0;
252 
253 	s = splnet();
254 
255 	if ((sc->sc_quirk & RTKQ_8139CPLUS) == 0) {
256 		rv = re_gmii_readreg(dev, phy, reg, val);
257 		splx(s);
258 		return rv;
259 	}
260 
261 	/* Pretend the internal PHY is only at address 0 */
262 	if (phy) {
263 		splx(s);
264 		return -1;
265 	}
266 	switch (reg) {
267 	case MII_BMCR:
268 		re8139_reg = RTK_BMCR;
269 		break;
270 	case MII_BMSR:
271 		re8139_reg = RTK_BMSR;
272 		break;
273 	case MII_ANAR:
274 		re8139_reg = RTK_ANAR;
275 		break;
276 	case MII_ANER:
277 		re8139_reg = RTK_ANER;
278 		break;
279 	case MII_ANLPAR:
280 		re8139_reg = RTK_LPAR;
281 		break;
282 	case MII_PHYIDR1:
283 	case MII_PHYIDR2:
284 		*val = 0;
285 		splx(s);
286 		return 0;
287 	/*
288 	 * Allow the rlphy driver to read the media status
289 	 * register. If we have a link partner which does not
290 	 * support NWAY, this is the register which will tell
291 	 * us the results of parallel detection.
292 	 */
293 	case RTK_MEDIASTAT:
294 		*val = CSR_READ_1(sc, RTK_MEDIASTAT);
295 		splx(s);
296 		return 0;
297 	default:
298 		printf("%s: bad phy register\n", device_xname(sc->sc_dev));
299 		splx(s);
300 		return -1;
301 	}
302 	*val = CSR_READ_2(sc, re8139_reg);
303 	if ((sc->sc_quirk & RTKQ_8139CPLUS) != 0 && re8139_reg == RTK_BMCR) {
304 		/* 8139C+ has different bit layout. */
305 		*val &= ~(BMCR_LOOP | BMCR_ISO);
306 	}
307 	splx(s);
308 	return 0;
309 }
310 
311 static int
312 re_miibus_writereg(device_t dev, int phy, int reg, uint16_t val)
313 {
314 	struct rtk_softc *sc = device_private(dev);
315 	uint16_t re8139_reg = 0;
316 	int s, rv;
317 
318 	s = splnet();
319 
320 	if ((sc->sc_quirk & RTKQ_8139CPLUS) == 0) {
321 		rv = re_gmii_writereg(dev, phy, reg, val);
322 		splx(s);
323 		return rv;
324 	}
325 
326 	/* Pretend the internal PHY is only at address 0 */
327 	if (phy) {
328 		splx(s);
329 		return -1;
330 	}
331 	switch (reg) {
332 	case MII_BMCR:
333 		re8139_reg = RTK_BMCR;
334 		if ((sc->sc_quirk & RTKQ_8139CPLUS) != 0) {
335 			/* 8139C+ has different bit layout. */
336 			val &= ~(BMCR_LOOP | BMCR_ISO);
337 		}
338 		break;
339 	case MII_BMSR:
340 		re8139_reg = RTK_BMSR;
341 		break;
342 	case MII_ANAR:
343 		re8139_reg = RTK_ANAR;
344 		break;
345 	case MII_ANER:
346 		re8139_reg = RTK_ANER;
347 		break;
348 	case MII_ANLPAR:
349 		re8139_reg = RTK_LPAR;
350 		break;
351 	case MII_PHYIDR1:
352 	case MII_PHYIDR2:
353 		splx(s);
354 		return 0;
355 		break;
356 	default:
357 		printf("%s: bad phy register\n", device_xname(sc->sc_dev));
358 		splx(s);
359 		return -1;
360 	}
361 	CSR_WRITE_2(sc, re8139_reg, val);
362 	splx(s);
363 	return 0;
364 }
365 
366 static void
367 re_miibus_statchg(struct ifnet *ifp)
368 {
369 
370 	return;
371 }
372 
373 static void
374 re_reset(struct rtk_softc *sc)
375 {
376 	int i;
377 
378 	CSR_WRITE_1(sc, RTK_COMMAND, RTK_CMD_RESET);
379 
380 	for (i = 0; i < RTK_TIMEOUT; i++) {
381 		DELAY(10);
382 		if ((CSR_READ_1(sc, RTK_COMMAND) & RTK_CMD_RESET) == 0)
383 			break;
384 	}
385 	if (i == RTK_TIMEOUT)
386 		printf("%s: reset never completed!\n",
387 		    device_xname(sc->sc_dev));
388 
389 	/*
390 	 * NB: Realtek-supplied FreeBSD driver does this only for MACFG_3,
391 	 *     but also says "Rtl8169s sigle chip detected".
392 	 */
393 	if ((sc->sc_quirk & RTKQ_MACLDPS) != 0)
394 		CSR_WRITE_1(sc, RTK_LDPS, 1);
395 
396 }
397 
398 /*
399  * The following routine is designed to test for a defect on some
400  * 32-bit 8169 cards. Some of these NICs have the REQ64# and ACK64#
401  * lines connected to the bus, however for a 32-bit only card, they
402  * should be pulled high. The result of this defect is that the
403  * NIC will not work right if you plug it into a 64-bit slot: DMA
404  * operations will be done with 64-bit transfers, which will fail
405  * because the 64-bit data lines aren't connected.
406  *
407  * There's no way to work around this (short of talking a soldering
408  * iron to the board), however we can detect it. The method we use
409  * here is to put the NIC into digital loopback mode, set the receiver
410  * to promiscuous mode, and then try to send a frame. We then compare
411  * the frame data we sent to what was received. If the data matches,
412  * then the NIC is working correctly, otherwise we know the user has
413  * a defective NIC which has been mistakenly plugged into a 64-bit PCI
414  * slot. In the latter case, there's no way the NIC can work correctly,
415  * so we print out a message on the console and abort the device attach.
416  */
417 
418 int
419 re_diag(struct rtk_softc *sc)
420 {
421 	struct ifnet *ifp = &sc->ethercom.ec_if;
422 	struct mbuf *m0;
423 	struct ether_header *eh;
424 	struct re_rxsoft *rxs;
425 	struct re_desc *cur_rx;
426 	bus_dmamap_t dmamap;
427 	uint16_t status;
428 	uint32_t rxstat;
429 	int total_len, i, s, error = 0;
430 	static const uint8_t dst[] = { 0x00, 'h', 'e', 'l', 'l', 'o' };
431 	static const uint8_t src[] = { 0x00, 'w', 'o', 'r', 'l', 'd' };
432 
433 	/* Allocate a single mbuf */
434 
435 	MGETHDR(m0, M_DONTWAIT, MT_DATA);
436 	if (m0 == NULL)
437 		return ENOBUFS;
438 
439 	/*
440 	 * Initialize the NIC in test mode. This sets the chip up
441 	 * so that it can send and receive frames, but performs the
442 	 * following special functions:
443 	 * - Puts receiver in promiscuous mode
444 	 * - Enables digital loopback mode
445 	 * - Leaves interrupts turned off
446 	 */
447 
448 	ifp->if_flags |= IFF_PROMISC;
449 	sc->re_testmode = 1;
450 	re_init(ifp);
451 	re_stop(ifp, 0);
452 	DELAY(100000);
453 	re_init(ifp);
454 
455 	/* Put some data in the mbuf */
456 
457 	eh = mtod(m0, struct ether_header *);
458 	memcpy(eh->ether_dhost, &dst, ETHER_ADDR_LEN);
459 	memcpy(eh->ether_shost, &src, ETHER_ADDR_LEN);
460 	eh->ether_type = htons(ETHERTYPE_IP);
461 	m0->m_pkthdr.len = m0->m_len = ETHER_MIN_LEN - ETHER_CRC_LEN;
462 
463 	/*
464 	 * Queue the packet, start transmission.
465 	 */
466 
467 	CSR_WRITE_2(sc, RTK_ISR, 0xFFFF);
468 	s = splnet();
469 	IF_ENQUEUE(&ifp->if_snd, m0);
470 	re_start(ifp);
471 	splx(s);
472 	m0 = NULL;
473 
474 	/* Wait for it to propagate through the chip */
475 
476 	DELAY(100000);
477 	for (i = 0; i < RTK_TIMEOUT; i++) {
478 		status = CSR_READ_2(sc, RTK_ISR);
479 		if ((status & (RTK_ISR_TIMEOUT_EXPIRED | RTK_ISR_RX_OK)) ==
480 		    (RTK_ISR_TIMEOUT_EXPIRED | RTK_ISR_RX_OK))
481 			break;
482 		DELAY(10);
483 	}
484 	if (i == RTK_TIMEOUT) {
485 		aprint_error_dev(sc->sc_dev,
486 		    "diagnostic failed, failed to receive packet "
487 		    "in loopback mode\n");
488 		error = EIO;
489 		goto done;
490 	}
491 
492 	/*
493 	 * The packet should have been dumped into the first
494 	 * entry in the RX DMA ring. Grab it from there.
495 	 */
496 
497 	rxs = &sc->re_ldata.re_rxsoft[0];
498 	dmamap = rxs->rxs_dmamap;
499 	bus_dmamap_sync(sc->sc_dmat, dmamap, 0, dmamap->dm_mapsize,
500 	    BUS_DMASYNC_POSTREAD);
501 	bus_dmamap_unload(sc->sc_dmat, dmamap);
502 
503 	m0 = rxs->rxs_mbuf;
504 	rxs->rxs_mbuf = NULL;
505 	eh = mtod(m0, struct ether_header *);
506 
507 	RE_RXDESCSYNC(sc, 0, BUS_DMASYNC_POSTREAD|BUS_DMASYNC_POSTWRITE);
508 	cur_rx = &sc->re_ldata.re_rx_list[0];
509 	rxstat = le32toh(cur_rx->re_cmdstat);
510 	total_len = rxstat & sc->re_rxlenmask;
511 
512 	if (total_len != ETHER_MIN_LEN) {
513 		aprint_error_dev(sc->sc_dev,
514 		    "diagnostic failed, received short packet\n");
515 		error = EIO;
516 		goto done;
517 	}
518 
519 	/* Test that the received packet data matches what we sent. */
520 
521 	if (memcmp(&eh->ether_dhost, &dst, ETHER_ADDR_LEN) ||
522 	    memcmp(&eh->ether_shost, &src, ETHER_ADDR_LEN) ||
523 	    ntohs(eh->ether_type) != ETHERTYPE_IP) {
524 		aprint_error_dev(sc->sc_dev, "WARNING, DMA FAILURE!\n"
525 		    "expected TX data: %s/%s/0x%x\n"
526 		    "received RX data: %s/%s/0x%x\n"
527 		    "You may have a defective 32-bit NIC plugged "
528 		    "into a 64-bit PCI slot.\n"
529 		    "Please re-install the NIC in a 32-bit slot "
530 		    "for proper operation.\n"
531 		    "Read the re(4) man page for more details.\n" ,
532 		    ether_sprintf(dst),  ether_sprintf(src), ETHERTYPE_IP,
533 		    ether_sprintf(eh->ether_dhost),
534 		    ether_sprintf(eh->ether_shost), ntohs(eh->ether_type));
535 		error = EIO;
536 	}
537 
538  done:
539 	/* Turn interface off, release resources */
540 
541 	sc->re_testmode = 0;
542 	ifp->if_flags &= ~IFF_PROMISC;
543 	re_stop(ifp, 0);
544 	if (m0 != NULL)
545 		m_freem(m0);
546 
547 	return error;
548 }
549 
550 
551 /*
552  * Attach the interface. Allocate softc structures, do ifmedia
553  * setup and ethernet/BPF attach.
554  */
555 void
556 re_attach(struct rtk_softc *sc)
557 {
558 	uint8_t eaddr[ETHER_ADDR_LEN];
559 	struct ifnet *ifp;
560 	struct mii_data *mii = &sc->mii;
561 	int error = 0, i;
562 
563 	if ((sc->sc_quirk & RTKQ_8139CPLUS) == 0) {
564 		uint32_t hwrev;
565 
566 		/* Revision of 8169/8169S/8110s in bits 30..26, 23 */
567 		hwrev = CSR_READ_4(sc, RTK_TXCFG) & RTK_TXCFG_HWREV;
568 		switch (hwrev) {
569 		case RTK_HWREV_8169:
570 			sc->sc_quirk |= RTKQ_8169NONS;
571 			break;
572 		case RTK_HWREV_8169S:
573 		case RTK_HWREV_8110S:
574 		case RTK_HWREV_8169_8110SB:
575 		case RTK_HWREV_8169_8110SBL:
576 		case RTK_HWREV_8169_8110SC:
577 			sc->sc_quirk |= RTKQ_MACLDPS;
578 			break;
579 		case RTK_HWREV_8168_SPIN1:
580 		case RTK_HWREV_8168_SPIN2:
581 		case RTK_HWREV_8168_SPIN3:
582 			sc->sc_quirk |= RTKQ_MACSTAT;
583 			break;
584 		case RTK_HWREV_8168C:
585 		case RTK_HWREV_8168C_SPIN2:
586 		case RTK_HWREV_8168CP:
587 		case RTK_HWREV_8168D:
588 		case RTK_HWREV_8168DP:
589 			sc->sc_quirk |= RTKQ_DESCV2 | RTKQ_NOEECMD |
590 			    RTKQ_MACSTAT | RTKQ_CMDSTOP;
591 			/*
592 			 * From FreeBSD driver:
593 			 *
594 			 * These (8168/8111) controllers support jumbo frame
595 			 * but it seems that enabling it requires touching
596 			 * additional magic registers. Depending on MAC
597 			 * revisions some controllers need to disable
598 			 * checksum offload. So disable jumbo frame until
599 			 * I have better idea what it really requires to
600 			 * make it support.
601 			 * RTL8168C/CP : supports up to 6KB jumbo frame.
602 			 * RTL8111C/CP : supports up to 9KB jumbo frame.
603 			 */
604 			sc->sc_quirk |= RTKQ_NOJUMBO;
605 			break;
606 		case RTK_HWREV_8168E:
607 		case RTK_HWREV_8168H_SPIN1:
608 			sc->sc_quirk |= RTKQ_DESCV2 | RTKQ_NOEECMD |
609 			    RTKQ_MACSTAT | RTKQ_CMDSTOP | RTKQ_PHYWAKE_PM |
610 			    RTKQ_NOJUMBO;
611 			break;
612 		case RTK_HWREV_8168H:
613 		case RTK_HWREV_8168FP:
614 			sc->sc_quirk |= RTKQ_DESCV2 | RTKQ_NOEECMD |
615 			    RTKQ_MACSTAT | RTKQ_CMDSTOP | RTKQ_PHYWAKE_PM |
616 			    RTKQ_NOJUMBO | RTKQ_RXDV_GATED | RTKQ_TXRXEN_LATER;
617 			break;
618 		case RTK_HWREV_8168E_VL:
619 		case RTK_HWREV_8168F:
620 		case RTK_HWREV_8411:
621 			sc->sc_quirk |= RTKQ_DESCV2 | RTKQ_NOEECMD |
622 			    RTKQ_MACSTAT | RTKQ_CMDSTOP | RTKQ_NOJUMBO;
623 			break;
624 		case RTK_HWREV_8168EP:
625 		case RTK_HWREV_8168G:
626 		case RTK_HWREV_8168G_SPIN1:
627 		case RTK_HWREV_8168G_SPIN2:
628 		case RTK_HWREV_8168G_SPIN4:
629 			sc->sc_quirk |= RTKQ_DESCV2 | RTKQ_NOEECMD |
630 			    RTKQ_MACSTAT | RTKQ_CMDSTOP | RTKQ_NOJUMBO |
631 			    RTKQ_RXDV_GATED;
632 			break;
633 		case RTK_HWREV_8100E:
634 		case RTK_HWREV_8100E_SPIN2:
635 		case RTK_HWREV_8101E:
636 			sc->sc_quirk |= RTKQ_NOJUMBO;
637 			break;
638 		case RTK_HWREV_8102E:
639 		case RTK_HWREV_8102EL:
640 		case RTK_HWREV_8102EL_SPIN1:
641 			sc->sc_quirk |= RTKQ_DESCV2 | RTKQ_NOEECMD |
642 			    RTKQ_MACSTAT | RTKQ_CMDSTOP | RTKQ_NOJUMBO;
643 			break;
644 		case RTK_HWREV_8103E:
645 			sc->sc_quirk |= RTKQ_DESCV2 | RTKQ_NOEECMD |
646 			    RTKQ_MACSTAT | RTKQ_CMDSTOP;
647 			break;
648 		case RTK_HWREV_8401E:
649 		case RTK_HWREV_8105E:
650 		case RTK_HWREV_8105E_SPIN1:
651 		case RTK_HWREV_8106E:
652 			sc->sc_quirk |= RTKQ_PHYWAKE_PM |
653 			    RTKQ_DESCV2 | RTKQ_NOEECMD | RTKQ_MACSTAT |
654 			    RTKQ_CMDSTOP;
655 			break;
656 		case RTK_HWREV_8402:
657 			sc->sc_quirk |= RTKQ_PHYWAKE_PM |
658 			    RTKQ_DESCV2 | RTKQ_NOEECMD | RTKQ_MACSTAT |
659 			    RTKQ_CMDSTOP; /* CMDSTOP_WAIT_TXQ */
660 			break;
661 		default:
662 			aprint_normal_dev(sc->sc_dev,
663 			    "Unknown revision (0x%08x)\n", hwrev);
664 			/* assume the latest features */
665 			sc->sc_quirk |= RTKQ_DESCV2 | RTKQ_NOEECMD;
666 			sc->sc_quirk |= RTKQ_NOJUMBO;
667 		}
668 
669 		/* Set RX length mask */
670 		sc->re_rxlenmask = RE_RDESC_STAT_GFRAGLEN;
671 		sc->re_ldata.re_tx_desc_cnt = RE_TX_DESC_CNT_8169;
672 	} else {
673 		sc->sc_quirk |= RTKQ_NOJUMBO;
674 
675 		/* Set RX length mask */
676 		sc->re_rxlenmask = RE_RDESC_STAT_FRAGLEN;
677 		sc->re_ldata.re_tx_desc_cnt = RE_TX_DESC_CNT_8139;
678 	}
679 
680 	/* Reset the adapter. */
681 	re_reset(sc);
682 
683 	/*
684 	 * RTL81x9 chips automatically read EEPROM to init MAC address,
685 	 * and some NAS override its MAC address per own configuration,
686 	 * so no need to explicitely read EEPROM and set ID registers.
687 	 */
688 #ifdef RE_USE_EECMD
689 	if ((sc->sc_quirk & RTKQ_NOEECMD) != 0) {
690 		/*
691 		 * Get station address from ID registers.
692 		 */
693 		for (i = 0; i < ETHER_ADDR_LEN; i++)
694 			eaddr[i] = CSR_READ_1(sc, RTK_IDR0 + i);
695 	} else {
696 		uint16_t val;
697 		int addr_len;
698 
699 		/*
700 		 * Get station address from the EEPROM.
701 		 */
702 		if (rtk_read_eeprom(sc, RTK_EE_ID, RTK_EEADDR_LEN1) == 0x8129)
703 			addr_len = RTK_EEADDR_LEN1;
704 		else
705 			addr_len = RTK_EEADDR_LEN0;
706 
707 		/*
708 		 * Get station address from the EEPROM.
709 		 */
710 		for (i = 0; i < ETHER_ADDR_LEN / 2; i++) {
711 			val = rtk_read_eeprom(sc, RTK_EE_EADDR0 + i, addr_len);
712 			eaddr[(i * 2) + 0] = val & 0xff;
713 			eaddr[(i * 2) + 1] = val >> 8;
714 		}
715 	}
716 #else
717 	/*
718 	 * Get station address from ID registers.
719 	 */
720 	for (i = 0; i < ETHER_ADDR_LEN; i++)
721 		eaddr[i] = CSR_READ_1(sc, RTK_IDR0 + i);
722 #endif
723 
724 	/* Take PHY out of power down mode. */
725 	if ((sc->sc_quirk & RTKQ_PHYWAKE_PM) != 0)
726 		CSR_WRITE_1(sc, RTK_PMCH, CSR_READ_1(sc, RTK_PMCH) | 0x80);
727 
728 	aprint_normal_dev(sc->sc_dev, "Ethernet address %s\n",
729 	    ether_sprintf(eaddr));
730 
731 	if (sc->re_ldata.re_tx_desc_cnt >
732 	    PAGE_SIZE / sizeof(struct re_desc)) {
733 		sc->re_ldata.re_tx_desc_cnt =
734 		    PAGE_SIZE / sizeof(struct re_desc);
735 	}
736 
737 	aprint_verbose_dev(sc->sc_dev, "using %d tx descriptors\n",
738 	    sc->re_ldata.re_tx_desc_cnt);
739 	KASSERT(RE_NEXT_TX_DESC(sc, RE_TX_DESC_CNT(sc) - 1) == 0);
740 
741 	/* Allocate DMA'able memory for the TX ring */
742 	if ((error = bus_dmamem_alloc(sc->sc_dmat, RE_TX_LIST_SZ(sc),
743 	    RE_RING_ALIGN, 0, &sc->re_ldata.re_tx_listseg, 1,
744 	    &sc->re_ldata.re_tx_listnseg, BUS_DMA_NOWAIT)) != 0) {
745 		aprint_error_dev(sc->sc_dev,
746 		    "can't allocate tx listseg, error = %d\n", error);
747 		goto fail_0;
748 	}
749 
750 	/* Load the map for the TX ring. */
751 	if ((error = bus_dmamem_map(sc->sc_dmat, &sc->re_ldata.re_tx_listseg,
752 	    sc->re_ldata.re_tx_listnseg, RE_TX_LIST_SZ(sc),
753 	    (void **)&sc->re_ldata.re_tx_list,
754 	    BUS_DMA_COHERENT | BUS_DMA_NOWAIT)) != 0) {
755 		aprint_error_dev(sc->sc_dev,
756 		    "can't map tx list, error = %d\n", error);
757 		goto fail_1;
758 	}
759 	memset(sc->re_ldata.re_tx_list, 0, RE_TX_LIST_SZ(sc));
760 
761 	if ((error = bus_dmamap_create(sc->sc_dmat, RE_TX_LIST_SZ(sc), 1,
762 	    RE_TX_LIST_SZ(sc), 0, 0,
763 	    &sc->re_ldata.re_tx_list_map)) != 0) {
764 		aprint_error_dev(sc->sc_dev,
765 		    "can't create tx list map, error = %d\n", error);
766 		goto fail_2;
767 	}
768 
769 
770 	if ((error = bus_dmamap_load(sc->sc_dmat,
771 	    sc->re_ldata.re_tx_list_map, sc->re_ldata.re_tx_list,
772 	    RE_TX_LIST_SZ(sc), NULL, BUS_DMA_NOWAIT)) != 0) {
773 		aprint_error_dev(sc->sc_dev,
774 		    "can't load tx list, error = %d\n", error);
775 		goto fail_3;
776 	}
777 
778 	/* Create DMA maps for TX buffers */
779 	for (i = 0; i < RE_TX_QLEN; i++) {
780 		error = bus_dmamap_create(sc->sc_dmat,
781 		    round_page(IP_MAXPACKET),
782 		    RE_TX_DESC_CNT(sc), RE_TDESC_CMD_FRAGLEN,
783 		    0, 0, &sc->re_ldata.re_txq[i].txq_dmamap);
784 		if (error) {
785 			aprint_error_dev(sc->sc_dev,
786 			    "can't create DMA map for TX\n");
787 			goto fail_4;
788 		}
789 	}
790 
791 	/* Allocate DMA'able memory for the RX ring */
792 	/* XXX see also a comment about RE_RX_DMAMEM_SZ in rtl81x9var.h */
793 	if ((error = bus_dmamem_alloc(sc->sc_dmat,
794 	    RE_RX_DMAMEM_SZ, RE_RING_ALIGN, 0, &sc->re_ldata.re_rx_listseg, 1,
795 	    &sc->re_ldata.re_rx_listnseg, BUS_DMA_NOWAIT)) != 0) {
796 		aprint_error_dev(sc->sc_dev,
797 		    "can't allocate rx listseg, error = %d\n", error);
798 		goto fail_4;
799 	}
800 
801 	/* Load the map for the RX ring. */
802 	if ((error = bus_dmamem_map(sc->sc_dmat, &sc->re_ldata.re_rx_listseg,
803 	    sc->re_ldata.re_rx_listnseg, RE_RX_DMAMEM_SZ,
804 	    (void **)&sc->re_ldata.re_rx_list,
805 	    BUS_DMA_COHERENT | BUS_DMA_NOWAIT)) != 0) {
806 		aprint_error_dev(sc->sc_dev,
807 		    "can't map rx list, error = %d\n", error);
808 		goto fail_5;
809 	}
810 	memset(sc->re_ldata.re_rx_list, 0, RE_RX_DMAMEM_SZ);
811 
812 	if ((error = bus_dmamap_create(sc->sc_dmat,
813 	    RE_RX_DMAMEM_SZ, 1, RE_RX_DMAMEM_SZ, 0, 0,
814 	    &sc->re_ldata.re_rx_list_map)) != 0) {
815 		aprint_error_dev(sc->sc_dev,
816 		    "can't create rx list map, error = %d\n", error);
817 		goto fail_6;
818 	}
819 
820 	if ((error = bus_dmamap_load(sc->sc_dmat,
821 	    sc->re_ldata.re_rx_list_map, sc->re_ldata.re_rx_list,
822 	    RE_RX_DMAMEM_SZ, NULL, BUS_DMA_NOWAIT)) != 0) {
823 		aprint_error_dev(sc->sc_dev,
824 		    "can't load rx list, error = %d\n", error);
825 		goto fail_7;
826 	}
827 
828 	/* Create DMA maps for RX buffers */
829 	for (i = 0; i < RE_RX_DESC_CNT; i++) {
830 		error = bus_dmamap_create(sc->sc_dmat, MCLBYTES, 1, MCLBYTES,
831 		    0, 0, &sc->re_ldata.re_rxsoft[i].rxs_dmamap);
832 		if (error) {
833 			aprint_error_dev(sc->sc_dev,
834 			    "can't create DMA map for RX\n");
835 			goto fail_8;
836 		}
837 	}
838 
839 	/*
840 	 * Record interface as attached. From here, we should not fail.
841 	 */
842 	sc->sc_flags |= RTK_ATTACHED;
843 
844 	ifp = &sc->ethercom.ec_if;
845 	ifp->if_softc = sc;
846 	strlcpy(ifp->if_xname, device_xname(sc->sc_dev), IFNAMSIZ);
847 	ifp->if_mtu = ETHERMTU;
848 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
849 	ifp->if_ioctl = re_ioctl;
850 	sc->ethercom.ec_capabilities |=
851 	    ETHERCAP_VLAN_MTU | ETHERCAP_VLAN_HWTAGGING;
852 	ifp->if_start = re_start;
853 	ifp->if_stop = re_stop;
854 
855 	/*
856 	 * IFCAP_CSUM_IPv4_Tx on re(4) is broken for small packets,
857 	 * so we have a workaround to handle the bug by padding
858 	 * such packets manually.
859 	 */
860 	ifp->if_capabilities |=
861 	    IFCAP_CSUM_IPv4_Tx | IFCAP_CSUM_IPv4_Rx |
862 	    IFCAP_CSUM_TCPv4_Tx | IFCAP_CSUM_TCPv4_Rx |
863 	    IFCAP_CSUM_UDPv4_Tx | IFCAP_CSUM_UDPv4_Rx |
864 	    IFCAP_TSOv4;
865 
866 	ifp->if_watchdog = re_watchdog;
867 	ifp->if_init = re_init;
868 	ifp->if_snd.ifq_maxlen = RE_IFQ_MAXLEN;
869 	ifp->if_capenable = ifp->if_capabilities;
870 	IFQ_SET_READY(&ifp->if_snd);
871 
872 	callout_init(&sc->rtk_tick_ch, 0);
873 	callout_setfunc(&sc->rtk_tick_ch, re_tick, sc);
874 
875 	/* Do MII setup */
876 	mii->mii_ifp = ifp;
877 	mii->mii_readreg = re_miibus_readreg;
878 	mii->mii_writereg = re_miibus_writereg;
879 	mii->mii_statchg = re_miibus_statchg;
880 	sc->ethercom.ec_mii = mii;
881 	ifmedia_init(&mii->mii_media, IFM_IMASK, ether_mediachange,
882 	    ether_mediastatus);
883 	mii_attach(sc->sc_dev, mii, 0xffffffff, MII_PHY_ANY,
884 	    MII_OFFSET_ANY, 0);
885 	ifmedia_set(&mii->mii_media, IFM_ETHER | IFM_AUTO);
886 
887 	/*
888 	 * Call MI attach routine.
889 	 */
890 	if_attach(ifp);
891 	if_deferred_start_init(ifp, NULL);
892 	ether_ifattach(ifp, eaddr);
893 
894 	rnd_attach_source(&sc->rnd_source, device_xname(sc->sc_dev),
895 	    RND_TYPE_NET, RND_FLAG_DEFAULT);
896 
897 	if (pmf_device_register(sc->sc_dev, NULL, NULL))
898 		pmf_class_network_register(sc->sc_dev, ifp);
899 	else
900 		aprint_error_dev(sc->sc_dev,
901 		    "couldn't establish power handler\n");
902 
903 	return;
904 
905  fail_8:
906 	/* Destroy DMA maps for RX buffers. */
907 	for (i = 0; i < RE_RX_DESC_CNT; i++)
908 		if (sc->re_ldata.re_rxsoft[i].rxs_dmamap != NULL)
909 			bus_dmamap_destroy(sc->sc_dmat,
910 			    sc->re_ldata.re_rxsoft[i].rxs_dmamap);
911 
912 	/* Free DMA'able memory for the RX ring. */
913 	bus_dmamap_unload(sc->sc_dmat, sc->re_ldata.re_rx_list_map);
914  fail_7:
915 	bus_dmamap_destroy(sc->sc_dmat, sc->re_ldata.re_rx_list_map);
916  fail_6:
917 	bus_dmamem_unmap(sc->sc_dmat,
918 	    (void *)sc->re_ldata.re_rx_list, RE_RX_DMAMEM_SZ);
919  fail_5:
920 	bus_dmamem_free(sc->sc_dmat,
921 	    &sc->re_ldata.re_rx_listseg, sc->re_ldata.re_rx_listnseg);
922 
923  fail_4:
924 	/* Destroy DMA maps for TX buffers. */
925 	for (i = 0; i < RE_TX_QLEN; i++)
926 		if (sc->re_ldata.re_txq[i].txq_dmamap != NULL)
927 			bus_dmamap_destroy(sc->sc_dmat,
928 			    sc->re_ldata.re_txq[i].txq_dmamap);
929 
930 	/* Free DMA'able memory for the TX ring. */
931 	bus_dmamap_unload(sc->sc_dmat, sc->re_ldata.re_tx_list_map);
932  fail_3:
933 	bus_dmamap_destroy(sc->sc_dmat, sc->re_ldata.re_tx_list_map);
934  fail_2:
935 	bus_dmamem_unmap(sc->sc_dmat,
936 	    (void *)sc->re_ldata.re_tx_list, RE_TX_LIST_SZ(sc));
937  fail_1:
938 	bus_dmamem_free(sc->sc_dmat,
939 	    &sc->re_ldata.re_tx_listseg, sc->re_ldata.re_tx_listnseg);
940  fail_0:
941 	return;
942 }
943 
944 
945 /*
946  * re_activate:
947  *     Handle device activation/deactivation requests.
948  */
949 int
950 re_activate(device_t self, enum devact act)
951 {
952 	struct rtk_softc *sc = device_private(self);
953 
954 	switch (act) {
955 	case DVACT_DEACTIVATE:
956 		if_deactivate(&sc->ethercom.ec_if);
957 		return 0;
958 	default:
959 		return EOPNOTSUPP;
960 	}
961 }
962 
963 /*
964  * re_detach:
965  *     Detach a rtk interface.
966  */
967 int
968 re_detach(struct rtk_softc *sc)
969 {
970 	struct ifnet *ifp = &sc->ethercom.ec_if;
971 	int i;
972 
973 	/*
974 	 * Succeed now if there isn't any work to do.
975 	 */
976 	if ((sc->sc_flags & RTK_ATTACHED) == 0)
977 		return 0;
978 
979 	/* Unhook our tick handler. */
980 	callout_stop(&sc->rtk_tick_ch);
981 
982 	/* Detach all PHYs. */
983 	mii_detach(&sc->mii, MII_PHY_ANY, MII_OFFSET_ANY);
984 
985 	rnd_detach_source(&sc->rnd_source);
986 	ether_ifdetach(ifp);
987 	if_detach(ifp);
988 
989 	/* Delete all remaining media. */
990 	ifmedia_fini(&sc->mii.mii_media);
991 
992 	/* Destroy DMA maps for RX buffers. */
993 	for (i = 0; i < RE_RX_DESC_CNT; i++)
994 		if (sc->re_ldata.re_rxsoft[i].rxs_dmamap != NULL)
995 			bus_dmamap_destroy(sc->sc_dmat,
996 			    sc->re_ldata.re_rxsoft[i].rxs_dmamap);
997 
998 	/* Free DMA'able memory for the RX ring. */
999 	bus_dmamap_unload(sc->sc_dmat, sc->re_ldata.re_rx_list_map);
1000 	bus_dmamap_destroy(sc->sc_dmat, sc->re_ldata.re_rx_list_map);
1001 	bus_dmamem_unmap(sc->sc_dmat,
1002 	    (void *)sc->re_ldata.re_rx_list, RE_RX_DMAMEM_SZ);
1003 	bus_dmamem_free(sc->sc_dmat,
1004 	    &sc->re_ldata.re_rx_listseg, sc->re_ldata.re_rx_listnseg);
1005 
1006 	/* Destroy DMA maps for TX buffers. */
1007 	for (i = 0; i < RE_TX_QLEN; i++)
1008 		if (sc->re_ldata.re_txq[i].txq_dmamap != NULL)
1009 			bus_dmamap_destroy(sc->sc_dmat,
1010 			    sc->re_ldata.re_txq[i].txq_dmamap);
1011 
1012 	/* Free DMA'able memory for the TX ring. */
1013 	bus_dmamap_unload(sc->sc_dmat, sc->re_ldata.re_tx_list_map);
1014 	bus_dmamap_destroy(sc->sc_dmat, sc->re_ldata.re_tx_list_map);
1015 	bus_dmamem_unmap(sc->sc_dmat,
1016 	    (void *)sc->re_ldata.re_tx_list, RE_TX_LIST_SZ(sc));
1017 	bus_dmamem_free(sc->sc_dmat,
1018 	    &sc->re_ldata.re_tx_listseg, sc->re_ldata.re_tx_listnseg);
1019 
1020 	pmf_device_deregister(sc->sc_dev);
1021 
1022 	/* we don't want to run again */
1023 	sc->sc_flags &= ~RTK_ATTACHED;
1024 
1025 	return 0;
1026 }
1027 
1028 /*
1029  * re_enable:
1030  *     Enable the RTL81X9 chip.
1031  */
1032 static int
1033 re_enable(struct rtk_softc *sc)
1034 {
1035 
1036 	if (RTK_IS_ENABLED(sc) == 0 && sc->sc_enable != NULL) {
1037 		if ((*sc->sc_enable)(sc) != 0) {
1038 			printf("%s: device enable failed\n",
1039 			    device_xname(sc->sc_dev));
1040 			return EIO;
1041 		}
1042 		sc->sc_flags |= RTK_ENABLED;
1043 	}
1044 	return 0;
1045 }
1046 
1047 /*
1048  * re_disable:
1049  *     Disable the RTL81X9 chip.
1050  */
1051 static void
1052 re_disable(struct rtk_softc *sc)
1053 {
1054 
1055 	if (RTK_IS_ENABLED(sc) && sc->sc_disable != NULL) {
1056 		(*sc->sc_disable)(sc);
1057 		sc->sc_flags &= ~RTK_ENABLED;
1058 	}
1059 }
1060 
1061 static int
1062 re_newbuf(struct rtk_softc *sc, int idx, struct mbuf *m)
1063 {
1064 	struct mbuf *n = NULL;
1065 	bus_dmamap_t map;
1066 	struct re_desc *d;
1067 	struct re_rxsoft *rxs;
1068 	uint32_t cmdstat;
1069 	int error;
1070 
1071 	if (m == NULL) {
1072 		MGETHDR(n, M_DONTWAIT, MT_DATA);
1073 		if (n == NULL)
1074 			return ENOBUFS;
1075 
1076 		MCLAIM(n, &sc->ethercom.ec_rx_mowner);
1077 		MCLGET(n, M_DONTWAIT);
1078 		if ((n->m_flags & M_EXT) == 0) {
1079 			m_freem(n);
1080 			return ENOBUFS;
1081 		}
1082 		m = n;
1083 	} else
1084 		m->m_data = m->m_ext.ext_buf;
1085 
1086 	/*
1087 	 * Initialize mbuf length fields and fixup
1088 	 * alignment so that the frame payload is
1089 	 * longword aligned.
1090 	 */
1091 	m->m_len = m->m_pkthdr.len = MCLBYTES - RE_ETHER_ALIGN;
1092 	m->m_data += RE_ETHER_ALIGN;
1093 
1094 	rxs = &sc->re_ldata.re_rxsoft[idx];
1095 	map = rxs->rxs_dmamap;
1096 	error = bus_dmamap_load_mbuf(sc->sc_dmat, map, m,
1097 	    BUS_DMA_READ|BUS_DMA_NOWAIT);
1098 
1099 	if (error)
1100 		goto out;
1101 
1102 	bus_dmamap_sync(sc->sc_dmat, map, 0, map->dm_mapsize,
1103 	    BUS_DMASYNC_PREREAD);
1104 
1105 	d = &sc->re_ldata.re_rx_list[idx];
1106 #ifdef DIAGNOSTIC
1107 	RE_RXDESCSYNC(sc, idx, BUS_DMASYNC_POSTREAD|BUS_DMASYNC_POSTWRITE);
1108 	cmdstat = le32toh(d->re_cmdstat);
1109 	RE_RXDESCSYNC(sc, idx, BUS_DMASYNC_PREREAD);
1110 	if (cmdstat & RE_RDESC_STAT_OWN) {
1111 		panic("%s: tried to map busy RX descriptor",
1112 		    device_xname(sc->sc_dev));
1113 	}
1114 #endif
1115 
1116 	rxs->rxs_mbuf = m;
1117 
1118 	d->re_vlanctl = 0;
1119 	cmdstat = map->dm_segs[0].ds_len;
1120 	if (idx == (RE_RX_DESC_CNT - 1))
1121 		cmdstat |= RE_RDESC_CMD_EOR;
1122 	re_set_bufaddr(d, map->dm_segs[0].ds_addr);
1123 	d->re_cmdstat = htole32(cmdstat);
1124 	RE_RXDESCSYNC(sc, idx, BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE);
1125 	cmdstat |= RE_RDESC_CMD_OWN;
1126 	d->re_cmdstat = htole32(cmdstat);
1127 	RE_RXDESCSYNC(sc, idx, BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE);
1128 
1129 	return 0;
1130  out:
1131 	if (n != NULL)
1132 		m_freem(n);
1133 	return ENOMEM;
1134 }
1135 
1136 static int
1137 re_tx_list_init(struct rtk_softc *sc)
1138 {
1139 	int i;
1140 
1141 	memset(sc->re_ldata.re_tx_list, 0, RE_TX_LIST_SZ(sc));
1142 	for (i = 0; i < RE_TX_QLEN; i++) {
1143 		sc->re_ldata.re_txq[i].txq_mbuf = NULL;
1144 	}
1145 
1146 	bus_dmamap_sync(sc->sc_dmat,
1147 	    sc->re_ldata.re_tx_list_map, 0,
1148 	    sc->re_ldata.re_tx_list_map->dm_mapsize,
1149 	    BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE);
1150 	sc->re_ldata.re_txq_prodidx = 0;
1151 	sc->re_ldata.re_txq_considx = 0;
1152 	sc->re_ldata.re_txq_free = RE_TX_QLEN;
1153 	sc->re_ldata.re_tx_free = RE_TX_DESC_CNT(sc);
1154 	sc->re_ldata.re_tx_nextfree = 0;
1155 
1156 	return 0;
1157 }
1158 
1159 static int
1160 re_rx_list_init(struct rtk_softc *sc)
1161 {
1162 	int i;
1163 
1164 	memset(sc->re_ldata.re_rx_list, 0, RE_RX_LIST_SZ);
1165 
1166 	for (i = 0; i < RE_RX_DESC_CNT; i++) {
1167 		if (re_newbuf(sc, i, NULL) == ENOBUFS)
1168 			return ENOBUFS;
1169 	}
1170 
1171 	sc->re_ldata.re_rx_prodidx = 0;
1172 	sc->re_head = sc->re_tail = NULL;
1173 
1174 	return 0;
1175 }
1176 
1177 /*
1178  * RX handler for C+ and 8169. For the gigE chips, we support
1179  * the reception of jumbo frames that have been fragmented
1180  * across multiple 2K mbuf cluster buffers.
1181  */
1182 static void
1183 re_rxeof(struct rtk_softc *sc)
1184 {
1185 	struct mbuf *m;
1186 	struct ifnet *ifp;
1187 	int i, total_len;
1188 	struct re_desc *cur_rx;
1189 	struct re_rxsoft *rxs;
1190 	uint32_t rxstat, rxvlan;
1191 
1192 	ifp = &sc->ethercom.ec_if;
1193 
1194 	for (i = sc->re_ldata.re_rx_prodidx;; i = RE_NEXT_RX_DESC(sc, i)) {
1195 		cur_rx = &sc->re_ldata.re_rx_list[i];
1196 		RE_RXDESCSYNC(sc, i,
1197 		    BUS_DMASYNC_POSTREAD|BUS_DMASYNC_POSTWRITE);
1198 		rxstat = le32toh(cur_rx->re_cmdstat);
1199 		rxvlan = le32toh(cur_rx->re_vlanctl);
1200 		RE_RXDESCSYNC(sc, i, BUS_DMASYNC_PREREAD);
1201 		if ((rxstat & RE_RDESC_STAT_OWN) != 0) {
1202 			break;
1203 		}
1204 		total_len = rxstat & sc->re_rxlenmask;
1205 		rxs = &sc->re_ldata.re_rxsoft[i];
1206 		m = rxs->rxs_mbuf;
1207 
1208 		/* Invalidate the RX mbuf and unload its map */
1209 
1210 		bus_dmamap_sync(sc->sc_dmat,
1211 		    rxs->rxs_dmamap, 0, rxs->rxs_dmamap->dm_mapsize,
1212 		    BUS_DMASYNC_POSTREAD);
1213 		bus_dmamap_unload(sc->sc_dmat, rxs->rxs_dmamap);
1214 
1215 		if ((rxstat & RE_RDESC_STAT_EOF) == 0) {
1216 			m->m_len = MCLBYTES - RE_ETHER_ALIGN;
1217 			if (sc->re_head == NULL)
1218 				sc->re_head = sc->re_tail = m;
1219 			else {
1220 				m_remove_pkthdr(m);
1221 				sc->re_tail->m_next = m;
1222 				sc->re_tail = m;
1223 			}
1224 			re_newbuf(sc, i, NULL);
1225 			continue;
1226 		}
1227 
1228 		/*
1229 		 * NOTE: for the 8139C+, the frame length field
1230 		 * is always 12 bits in size, but for the gigE chips,
1231 		 * it is 13 bits (since the max RX frame length is 16K).
1232 		 * Unfortunately, all 32 bits in the status word
1233 		 * were already used, so to make room for the extra
1234 		 * length bit, RealTek took out the 'frame alignment
1235 		 * error' bit and shifted the other status bits
1236 		 * over one slot. The OWN, EOR, FS and LS bits are
1237 		 * still in the same places. We have already extracted
1238 		 * the frame length and checked the OWN bit, so rather
1239 		 * than using an alternate bit mapping, we shift the
1240 		 * status bits one space to the right so we can evaluate
1241 		 * them using the 8169 status as though it was in the
1242 		 * same format as that of the 8139C+.
1243 		 */
1244 		if ((sc->sc_quirk & RTKQ_8139CPLUS) == 0)
1245 			rxstat >>= 1;
1246 
1247 		if (__predict_false((rxstat & RE_RDESC_STAT_RXERRSUM) != 0)) {
1248 #ifdef RE_DEBUG
1249 			printf("%s: RX error (rxstat = 0x%08x)",
1250 			    device_xname(sc->sc_dev), rxstat);
1251 			if (rxstat & RE_RDESC_STAT_FRALIGN)
1252 				printf(", frame alignment error");
1253 			if (rxstat & RE_RDESC_STAT_BUFOFLOW)
1254 				printf(", out of buffer space");
1255 			if (rxstat & RE_RDESC_STAT_FIFOOFLOW)
1256 				printf(", FIFO overrun");
1257 			if (rxstat & RE_RDESC_STAT_GIANT)
1258 				printf(", giant packet");
1259 			if (rxstat & RE_RDESC_STAT_RUNT)
1260 				printf(", runt packet");
1261 			if (rxstat & RE_RDESC_STAT_CRCERR)
1262 				printf(", CRC error");
1263 			printf("\n");
1264 #endif
1265 			if_statinc(ifp, if_ierrors);
1266 			/*
1267 			 * If this is part of a multi-fragment packet,
1268 			 * discard all the pieces.
1269 			 */
1270 			if (sc->re_head != NULL) {
1271 				m_freem(sc->re_head);
1272 				sc->re_head = sc->re_tail = NULL;
1273 			}
1274 			re_newbuf(sc, i, m);
1275 			continue;
1276 		}
1277 
1278 		/*
1279 		 * If allocating a replacement mbuf fails,
1280 		 * reload the current one.
1281 		 */
1282 
1283 		if (__predict_false(re_newbuf(sc, i, NULL) != 0)) {
1284 			if_statinc(ifp, if_ierrors);
1285 			if (sc->re_head != NULL) {
1286 				m_freem(sc->re_head);
1287 				sc->re_head = sc->re_tail = NULL;
1288 			}
1289 			re_newbuf(sc, i, m);
1290 			continue;
1291 		}
1292 
1293 		if (sc->re_head != NULL) {
1294 			m->m_len = total_len % (MCLBYTES - RE_ETHER_ALIGN);
1295 			/*
1296 			 * Special case: if there's 4 bytes or less
1297 			 * in this buffer, the mbuf can be discarded:
1298 			 * the last 4 bytes is the CRC, which we don't
1299 			 * care about anyway.
1300 			 */
1301 			if (m->m_len <= ETHER_CRC_LEN) {
1302 				sc->re_tail->m_len -=
1303 				    (ETHER_CRC_LEN - m->m_len);
1304 				m_freem(m);
1305 			} else {
1306 				m->m_len -= ETHER_CRC_LEN;
1307 				m_remove_pkthdr(m);
1308 				sc->re_tail->m_next = m;
1309 			}
1310 			m = sc->re_head;
1311 			sc->re_head = sc->re_tail = NULL;
1312 			m->m_pkthdr.len = total_len - ETHER_CRC_LEN;
1313 		} else
1314 			m->m_pkthdr.len = m->m_len =
1315 			    (total_len - ETHER_CRC_LEN);
1316 
1317 		m_set_rcvif(m, ifp);
1318 
1319 		/* Do RX checksumming */
1320 		if ((sc->sc_quirk & RTKQ_DESCV2) == 0) {
1321 			/* Check IP header checksum */
1322 			if ((rxstat & RE_RDESC_STAT_PROTOID) != 0) {
1323 				m->m_pkthdr.csum_flags |= M_CSUM_IPv4;
1324 				if (rxstat & RE_RDESC_STAT_IPSUMBAD)
1325 					m->m_pkthdr.csum_flags |=
1326 					    M_CSUM_IPv4_BAD;
1327 
1328 				/* Check TCP/UDP checksum */
1329 				if (RE_TCPPKT(rxstat)) {
1330 					m->m_pkthdr.csum_flags |= M_CSUM_TCPv4;
1331 					if (rxstat & RE_RDESC_STAT_TCPSUMBAD)
1332 						m->m_pkthdr.csum_flags |=
1333 						    M_CSUM_TCP_UDP_BAD;
1334 				} else if (RE_UDPPKT(rxstat)) {
1335 					m->m_pkthdr.csum_flags |= M_CSUM_UDPv4;
1336 					if (rxstat & RE_RDESC_STAT_UDPSUMBAD) {
1337 						/*
1338 						 * XXX: 8139C+ thinks UDP csum
1339 						 * 0xFFFF is bad, force software
1340 						 * calculation.
1341 						 */
1342 						if (sc->sc_quirk & RTKQ_8139CPLUS)
1343 							m->m_pkthdr.csum_flags
1344 							    &= ~M_CSUM_UDPv4;
1345 						else
1346 							m->m_pkthdr.csum_flags
1347 							    |= M_CSUM_TCP_UDP_BAD;
1348 					}
1349 				}
1350 			}
1351 		} else {
1352 			/* Check IPv4 header checksum */
1353 			if ((rxvlan & RE_RDESC_VLANCTL_IPV4) != 0) {
1354 				m->m_pkthdr.csum_flags |= M_CSUM_IPv4;
1355 				if (rxstat & RE_RDESC_STAT_IPSUMBAD)
1356 					m->m_pkthdr.csum_flags |=
1357 					    M_CSUM_IPv4_BAD;
1358 
1359 				/* Check TCPv4/UDPv4 checksum */
1360 				if (RE_TCPPKT(rxstat)) {
1361 					m->m_pkthdr.csum_flags |= M_CSUM_TCPv4;
1362 					if (rxstat & RE_RDESC_STAT_TCPSUMBAD)
1363 						m->m_pkthdr.csum_flags |=
1364 						    M_CSUM_TCP_UDP_BAD;
1365 				} else if (RE_UDPPKT(rxstat)) {
1366 					m->m_pkthdr.csum_flags |= M_CSUM_UDPv4;
1367 					if (rxstat & RE_RDESC_STAT_UDPSUMBAD)
1368 						m->m_pkthdr.csum_flags |=
1369 						    M_CSUM_TCP_UDP_BAD;
1370 				}
1371 			}
1372 			/* XXX Check TCPv6/UDPv6 checksum? */
1373 		}
1374 
1375 		if (rxvlan & RE_RDESC_VLANCTL_TAG) {
1376 			vlan_set_tag(m,
1377 			     bswap16(rxvlan & RE_RDESC_VLANCTL_DATA));
1378 		}
1379 		if_percpuq_enqueue(ifp->if_percpuq, m);
1380 	}
1381 
1382 	sc->re_ldata.re_rx_prodidx = i;
1383 }
1384 
1385 static void
1386 re_txeof(struct rtk_softc *sc)
1387 {
1388 	struct ifnet *ifp;
1389 	struct re_txq *txq;
1390 	uint32_t txstat;
1391 	int idx, descidx;
1392 
1393 	ifp = &sc->ethercom.ec_if;
1394 
1395 	for (idx = sc->re_ldata.re_txq_considx;
1396 	    sc->re_ldata.re_txq_free < RE_TX_QLEN;
1397 	    idx = RE_NEXT_TXQ(sc, idx), sc->re_ldata.re_txq_free++) {
1398 		txq = &sc->re_ldata.re_txq[idx];
1399 		KASSERT(txq->txq_mbuf != NULL);
1400 
1401 		descidx = txq->txq_descidx;
1402 		RE_TXDESCSYNC(sc, descidx,
1403 		    BUS_DMASYNC_POSTREAD|BUS_DMASYNC_POSTWRITE);
1404 		txstat =
1405 		    le32toh(sc->re_ldata.re_tx_list[descidx].re_cmdstat);
1406 		RE_TXDESCSYNC(sc, descidx, BUS_DMASYNC_PREREAD);
1407 		KASSERT((txstat & RE_TDESC_CMD_EOF) != 0);
1408 		if (txstat & RE_TDESC_CMD_OWN) {
1409 			break;
1410 		}
1411 
1412 		sc->re_ldata.re_tx_free += txq->txq_nsegs;
1413 		KASSERT(sc->re_ldata.re_tx_free <= RE_TX_DESC_CNT(sc));
1414 		bus_dmamap_sync(sc->sc_dmat, txq->txq_dmamap,
1415 		    0, txq->txq_dmamap->dm_mapsize, BUS_DMASYNC_POSTWRITE);
1416 		bus_dmamap_unload(sc->sc_dmat, txq->txq_dmamap);
1417 		m_freem(txq->txq_mbuf);
1418 		txq->txq_mbuf = NULL;
1419 
1420 		net_stat_ref_t nsr = IF_STAT_GETREF(ifp);
1421 		if (txstat & (RE_TDESC_STAT_EXCESSCOL | RE_TDESC_STAT_COLCNT))
1422 			if_statinc_ref(nsr, if_collisions);
1423 		if (txstat & RE_TDESC_STAT_TXERRSUM)
1424 			if_statinc_ref(nsr, if_oerrors);
1425 		else
1426 			if_statinc_ref(nsr, if_opackets);
1427 		IF_STAT_PUTREF(ifp);
1428 	}
1429 
1430 	sc->re_ldata.re_txq_considx = idx;
1431 
1432 	if (sc->re_ldata.re_txq_free > RE_NTXDESC_RSVD)
1433 		ifp->if_flags &= ~IFF_OACTIVE;
1434 
1435 	/*
1436 	 * If not all descriptors have been released reaped yet,
1437 	 * reload the timer so that we will eventually get another
1438 	 * interrupt that will cause us to re-enter this routine.
1439 	 * This is done in case the transmitter has gone idle.
1440 	 */
1441 	if (sc->re_ldata.re_txq_free < RE_TX_QLEN) {
1442 		if ((sc->sc_quirk & RTKQ_IM_HW) == 0)
1443 			CSR_WRITE_4(sc, RTK_TIMERCNT, 1);
1444 		if ((sc->sc_quirk & RTKQ_PCIE) != 0) {
1445 			/*
1446 			 * Some chips will ignore a second TX request
1447 			 * issued while an existing transmission is in
1448 			 * progress. If the transmitter goes idle but
1449 			 * there are still packets waiting to be sent,
1450 			 * we need to restart the channel here to flush
1451 			 * them out. This only seems to be required with
1452 			 * the PCIe devices.
1453 			 */
1454 			CSR_WRITE_1(sc, RTK_GTXSTART, RTK_TXSTART_START);
1455 		}
1456 	} else
1457 		ifp->if_timer = 0;
1458 }
1459 
1460 static void
1461 re_tick(void *arg)
1462 {
1463 	struct rtk_softc *sc = arg;
1464 	int s;
1465 
1466 	/* XXX: just return for 8169S/8110S with rev 2 or newer phy */
1467 	s = splnet();
1468 
1469 	mii_tick(&sc->mii);
1470 	splx(s);
1471 
1472 	callout_schedule(&sc->rtk_tick_ch, hz);
1473 }
1474 
1475 int
1476 re_intr(void *arg)
1477 {
1478 	struct rtk_softc *sc = arg;
1479 	struct ifnet *ifp;
1480 	uint16_t status;
1481 	int handled = 0;
1482 
1483 	if (!device_has_power(sc->sc_dev))
1484 		return 0;
1485 
1486 	ifp = &sc->ethercom.ec_if;
1487 
1488 	if ((ifp->if_flags & IFF_UP) == 0)
1489 		return 0;
1490 
1491 	const uint16_t status_mask = (sc->sc_quirk & RTKQ_IM_HW) ?
1492 	    RTK_INTRS_IM_HW : RTK_INTRS_CPLUS;
1493 
1494 	for (;;) {
1495 
1496 		status = CSR_READ_2(sc, RTK_ISR);
1497 		/* If the card has gone away the read returns 0xffff. */
1498 		if (status == 0xffff)
1499 			break;
1500 		if (status) {
1501 			handled = 1;
1502 			CSR_WRITE_2(sc, RTK_ISR, status);
1503 		}
1504 
1505 		if ((status & status_mask) == 0)
1506 			break;
1507 
1508 		if (status & (RTK_ISR_RX_OK | RTK_ISR_RX_ERR))
1509 			re_rxeof(sc);
1510 
1511 		if (status & (RTK_ISR_TIMEOUT_EXPIRED | RTK_ISR_TX_ERR |
1512 		    RTK_ISR_TX_DESC_UNAVAIL | RTK_ISR_TX_OK))
1513 			re_txeof(sc);
1514 
1515 		if (status & RTK_ISR_SYSTEM_ERR) {
1516 			re_init(ifp);
1517 		}
1518 
1519 		if (status & RTK_ISR_LINKCHG) {
1520 			callout_stop(&sc->rtk_tick_ch);
1521 			re_tick(sc);
1522 		}
1523 	}
1524 
1525 	if (handled)
1526 		if_schedule_deferred_start(ifp);
1527 
1528 	rnd_add_uint32(&sc->rnd_source, status);
1529 
1530 	return handled;
1531 }
1532 
1533 
1534 
1535 /*
1536  * Main transmit routine for C+ and gigE NICs.
1537  */
1538 
1539 static void
1540 re_start(struct ifnet *ifp)
1541 {
1542 	struct rtk_softc *sc;
1543 	struct mbuf *m;
1544 	bus_dmamap_t map;
1545 	struct re_txq *txq;
1546 	struct re_desc *d;
1547 	uint32_t cmdstat, re_flags, vlanctl;
1548 	int ofree, idx, error, nsegs, seg;
1549 	int startdesc, curdesc, lastdesc;
1550 	bool pad;
1551 
1552 	sc = ifp->if_softc;
1553 	ofree = sc->re_ldata.re_txq_free;
1554 
1555 	for (idx = sc->re_ldata.re_txq_prodidx;; idx = RE_NEXT_TXQ(sc, idx)) {
1556 
1557 		IFQ_POLL(&ifp->if_snd, m);
1558 		if (m == NULL)
1559 			break;
1560 
1561 		if (sc->re_ldata.re_txq_free == 0 ||
1562 		    sc->re_ldata.re_tx_free == 0) {
1563 			/* no more free slots left */
1564 			ifp->if_flags |= IFF_OACTIVE;
1565 			break;
1566 		}
1567 
1568 		/*
1569 		 * Set up checksum offload. Note: checksum offload bits must
1570 		 * appear in all descriptors of a multi-descriptor transmit
1571 		 * attempt. (This is according to testing done with an 8169
1572 		 * chip. I'm not sure if this is a requirement or a bug.)
1573 		 */
1574 
1575 		vlanctl = 0;
1576 		if ((m->m_pkthdr.csum_flags & M_CSUM_TSOv4) != 0) {
1577 			uint32_t segsz = m->m_pkthdr.segsz;
1578 
1579 			if ((sc->sc_quirk & RTKQ_DESCV2) == 0) {
1580 				re_flags = RE_TDESC_CMD_LGSEND |
1581 				    (segsz << RE_TDESC_CMD_MSSVAL_SHIFT);
1582 			} else {
1583 				re_flags = RE_TDESC_CMD_LGSEND_V4;
1584 				vlanctl |=
1585 				    (segsz << RE_TDESC_VLANCTL_MSSVAL_SHIFT);
1586 			}
1587 		} else {
1588 			/*
1589 			 * set RE_TDESC_CMD_IPCSUM if any checksum offloading
1590 			 * is requested.  otherwise, RE_TDESC_CMD_TCPCSUM/
1591 			 * RE_TDESC_CMD_UDPCSUM doesn't make effects.
1592 			 */
1593 			re_flags = 0;
1594 			if ((m->m_pkthdr.csum_flags &
1595 			    (M_CSUM_IPv4 | M_CSUM_TCPv4 | M_CSUM_UDPv4))
1596 			    != 0) {
1597 				if ((sc->sc_quirk & RTKQ_DESCV2) == 0) {
1598 					re_flags |= RE_TDESC_CMD_IPCSUM;
1599 					if (m->m_pkthdr.csum_flags &
1600 					    M_CSUM_TCPv4) {
1601 						re_flags |=
1602 						    RE_TDESC_CMD_TCPCSUM;
1603 					} else if (m->m_pkthdr.csum_flags &
1604 					    M_CSUM_UDPv4) {
1605 						re_flags |=
1606 						    RE_TDESC_CMD_UDPCSUM;
1607 					}
1608 				} else {
1609 					vlanctl |= RE_TDESC_VLANCTL_IPCSUM;
1610 					if (m->m_pkthdr.csum_flags &
1611 					    M_CSUM_TCPv4) {
1612 						vlanctl |=
1613 						    RE_TDESC_VLANCTL_TCPCSUM;
1614 					} else if (m->m_pkthdr.csum_flags &
1615 					    M_CSUM_UDPv4) {
1616 						vlanctl |=
1617 						    RE_TDESC_VLANCTL_UDPCSUM;
1618 					}
1619 				}
1620 			}
1621 		}
1622 
1623 		txq = &sc->re_ldata.re_txq[idx];
1624 		map = txq->txq_dmamap;
1625 		error = bus_dmamap_load_mbuf(sc->sc_dmat, map, m,
1626 		    BUS_DMA_WRITE|BUS_DMA_NOWAIT);
1627 
1628 		if (__predict_false(error)) {
1629 			/* XXX try to defrag if EFBIG? */
1630 			printf("%s: can't map mbuf (error %d)\n",
1631 			    device_xname(sc->sc_dev), error);
1632 
1633 			IFQ_DEQUEUE(&ifp->if_snd, m);
1634 			m_freem(m);
1635 			if_statinc(ifp, if_oerrors);
1636 			continue;
1637 		}
1638 
1639 		nsegs = map->dm_nsegs;
1640 		pad = false;
1641 		if (__predict_false(m->m_pkthdr.len <= RE_IP4CSUMTX_PADLEN &&
1642 		    (re_flags & RE_TDESC_CMD_IPCSUM) != 0 &&
1643 		    (sc->sc_quirk & RTKQ_DESCV2) == 0)) {
1644 			pad = true;
1645 			nsegs++;
1646 		}
1647 
1648 		if (nsegs > sc->re_ldata.re_tx_free) {
1649 			/*
1650 			 * Not enough free descriptors to transmit this packet.
1651 			 */
1652 			ifp->if_flags |= IFF_OACTIVE;
1653 			bus_dmamap_unload(sc->sc_dmat, map);
1654 			break;
1655 		}
1656 
1657 		IFQ_DEQUEUE(&ifp->if_snd, m);
1658 
1659 		/*
1660 		 * Make sure that the caches are synchronized before we
1661 		 * ask the chip to start DMA for the packet data.
1662 		 */
1663 		bus_dmamap_sync(sc->sc_dmat, map, 0, map->dm_mapsize,
1664 		    BUS_DMASYNC_PREWRITE);
1665 
1666 		/*
1667 		 * Set up hardware VLAN tagging. Note: vlan tag info must
1668 		 * appear in all descriptors of a multi-descriptor
1669 		 * transmission attempt.
1670 		 */
1671 		if (vlan_has_tag(m))
1672 			vlanctl |= bswap16(vlan_get_tag(m)) |
1673 			    RE_TDESC_VLANCTL_TAG;
1674 
1675 		/*
1676 		 * Map the segment array into descriptors.
1677 		 * Note that we set the start-of-frame and
1678 		 * end-of-frame markers for either TX or RX,
1679 		 * but they really only have meaning in the TX case.
1680 		 * (In the RX case, it's the chip that tells us
1681 		 *  where packets begin and end.)
1682 		 * We also keep track of the end of the ring
1683 		 * and set the end-of-ring bits as needed,
1684 		 * and we set the ownership bits in all except
1685 		 * the very first descriptor. (The caller will
1686 		 * set this descriptor later when it start
1687 		 * transmission or reception.)
1688 		 */
1689 		curdesc = startdesc = sc->re_ldata.re_tx_nextfree;
1690 		lastdesc = -1;
1691 		for (seg = 0; seg < map->dm_nsegs;
1692 		    seg++, curdesc = RE_NEXT_TX_DESC(sc, curdesc)) {
1693 			d = &sc->re_ldata.re_tx_list[curdesc];
1694 #ifdef DIAGNOSTIC
1695 			RE_TXDESCSYNC(sc, curdesc,
1696 			    BUS_DMASYNC_POSTREAD|BUS_DMASYNC_POSTWRITE);
1697 			cmdstat = le32toh(d->re_cmdstat);
1698 			RE_TXDESCSYNC(sc, curdesc, BUS_DMASYNC_PREREAD);
1699 			if (cmdstat & RE_TDESC_STAT_OWN) {
1700 				panic("%s: tried to map busy TX descriptor",
1701 				    device_xname(sc->sc_dev));
1702 			}
1703 #endif
1704 
1705 			d->re_vlanctl = htole32(vlanctl);
1706 			re_set_bufaddr(d, map->dm_segs[seg].ds_addr);
1707 			cmdstat = re_flags | map->dm_segs[seg].ds_len;
1708 			if (seg == 0)
1709 				cmdstat |= RE_TDESC_CMD_SOF;
1710 			else
1711 				cmdstat |= RE_TDESC_CMD_OWN;
1712 			if (curdesc == (RE_TX_DESC_CNT(sc) - 1))
1713 				cmdstat |= RE_TDESC_CMD_EOR;
1714 			if (seg == nsegs - 1) {
1715 				cmdstat |= RE_TDESC_CMD_EOF;
1716 				lastdesc = curdesc;
1717 			}
1718 			d->re_cmdstat = htole32(cmdstat);
1719 			RE_TXDESCSYNC(sc, curdesc,
1720 			    BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE);
1721 		}
1722 		if (__predict_false(pad)) {
1723 			d = &sc->re_ldata.re_tx_list[curdesc];
1724 			d->re_vlanctl = htole32(vlanctl);
1725 			re_set_bufaddr(d, RE_TXPADDADDR(sc));
1726 			cmdstat = re_flags |
1727 			    RE_TDESC_CMD_OWN | RE_TDESC_CMD_EOF |
1728 			    (RE_IP4CSUMTX_PADLEN + 1 - m->m_pkthdr.len);
1729 			if (curdesc == (RE_TX_DESC_CNT(sc) - 1))
1730 				cmdstat |= RE_TDESC_CMD_EOR;
1731 			d->re_cmdstat = htole32(cmdstat);
1732 			RE_TXDESCSYNC(sc, curdesc,
1733 			    BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE);
1734 			lastdesc = curdesc;
1735 			curdesc = RE_NEXT_TX_DESC(sc, curdesc);
1736 		}
1737 		KASSERT(lastdesc != -1);
1738 
1739 		/* Transfer ownership of packet to the chip. */
1740 
1741 		sc->re_ldata.re_tx_list[startdesc].re_cmdstat |=
1742 		    htole32(RE_TDESC_CMD_OWN);
1743 		RE_TXDESCSYNC(sc, startdesc,
1744 		    BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE);
1745 
1746 		/* update info of TX queue and descriptors */
1747 		txq->txq_mbuf = m;
1748 		txq->txq_descidx = lastdesc;
1749 		txq->txq_nsegs = nsegs;
1750 
1751 		sc->re_ldata.re_txq_free--;
1752 		sc->re_ldata.re_tx_free -= nsegs;
1753 		sc->re_ldata.re_tx_nextfree = curdesc;
1754 
1755 		/*
1756 		 * If there's a BPF listener, bounce a copy of this frame
1757 		 * to him.
1758 		 */
1759 		bpf_mtap(ifp, m, BPF_D_OUT);
1760 	}
1761 
1762 	if (sc->re_ldata.re_txq_free < ofree) {
1763 		/*
1764 		 * TX packets are enqueued.
1765 		 */
1766 		sc->re_ldata.re_txq_prodidx = idx;
1767 
1768 		/*
1769 		 * Start the transmitter to poll.
1770 		 *
1771 		 * RealTek put the TX poll request register in a different
1772 		 * location on the 8169 gigE chip. I don't know why.
1773 		 */
1774 		if ((sc->sc_quirk & RTKQ_8139CPLUS) != 0)
1775 			CSR_WRITE_1(sc, RTK_TXSTART, RTK_TXSTART_START);
1776 		else
1777 			CSR_WRITE_1(sc, RTK_GTXSTART, RTK_TXSTART_START);
1778 
1779 		if ((sc->sc_quirk & RTKQ_IM_HW) == 0) {
1780 			/*
1781 			 * Use the countdown timer for interrupt moderation.
1782 			 * 'TX done' interrupts are disabled. Instead, we reset
1783 			 * the countdown timer, which will begin counting until
1784 			 * it hits the value in the TIMERINT register, and then
1785 			 * trigger an interrupt. Each time we write to the
1786 			 * TIMERCNT register, the timer count is reset to 0.
1787 			 */
1788 			CSR_WRITE_4(sc, RTK_TIMERCNT, 1);
1789 		}
1790 
1791 		/*
1792 		 * Set a timeout in case the chip goes out to lunch.
1793 		 */
1794 		ifp->if_timer = 5;
1795 	}
1796 }
1797 
1798 static int
1799 re_init(struct ifnet *ifp)
1800 {
1801 	struct rtk_softc *sc = ifp->if_softc;
1802 	uint32_t rxcfg = 0;
1803 	uint16_t cfg;
1804 	int error;
1805 #ifdef RE_USE_EECMD
1806 	const uint8_t *enaddr;
1807 	uint32_t reg;
1808 #endif
1809 
1810 	if ((error = re_enable(sc)) != 0)
1811 		goto out;
1812 
1813 	/*
1814 	 * Cancel pending I/O and free all RX/TX buffers.
1815 	 */
1816 	re_stop(ifp, 0);
1817 
1818 	re_reset(sc);
1819 
1820 	/*
1821 	 * Enable C+ RX and TX mode, as well as VLAN stripping and
1822 	 * RX checksum offload. We must configure the C+ register
1823 	 * before all others.
1824 	 */
1825 	cfg = RE_CPLUSCMD_PCI_MRW;
1826 
1827 	/*
1828 	 * XXX: For old 8169 set bit 14.
1829 	 *      For 8169S/8110S and above, do not set bit 14.
1830 	 */
1831 	if ((sc->sc_quirk & RTKQ_8169NONS) != 0)
1832 		cfg |= (0x1 << 14);
1833 
1834 	if ((sc->ethercom.ec_capenable & ETHERCAP_VLAN_HWTAGGING) != 0)
1835 		cfg |= RE_CPLUSCMD_VLANSTRIP;
1836 	if ((ifp->if_capenable & (IFCAP_CSUM_IPv4_Rx |
1837 	     IFCAP_CSUM_TCPv4_Rx | IFCAP_CSUM_UDPv4_Rx)) != 0)
1838 		cfg |= RE_CPLUSCMD_RXCSUM_ENB;
1839 	if ((sc->sc_quirk & RTKQ_MACSTAT) != 0) {
1840 		cfg |= RE_CPLUSCMD_MACSTAT_DIS;
1841 		cfg |= RE_CPLUSCMD_TXENB;
1842 	} else
1843 		cfg |= RE_CPLUSCMD_RXENB | RE_CPLUSCMD_TXENB;
1844 
1845 	CSR_WRITE_2(sc, RTK_CPLUS_CMD, cfg);
1846 
1847 	/* XXX: from Realtek-supplied Linux driver. Wholly undocumented. */
1848 	if ((sc->sc_quirk & RTKQ_8139CPLUS) == 0) {
1849 		if ((sc->sc_quirk & RTKQ_IM_HW) == 0) {
1850 			CSR_WRITE_2(sc, RTK_IM, 0x0000);
1851 		} else {
1852 			CSR_WRITE_2(sc, RTK_IM, 0x5151);
1853 		}
1854 	}
1855 
1856 	DELAY(10000);
1857 
1858 #ifdef RE_USE_EECMD
1859 	/*
1860 	 * Init our MAC address.  Even though the chipset
1861 	 * documentation doesn't mention it, we need to enter "Config
1862 	 * register write enable" mode to modify the ID registers.
1863 	 */
1864 	CSR_WRITE_1(sc, RTK_EECMD, RTK_EEMODE_WRITECFG);
1865 	enaddr = CLLADDR(ifp->if_sadl);
1866 	reg = enaddr[0] | (enaddr[1] << 8) |
1867 	    (enaddr[2] << 16) | (enaddr[3] << 24);
1868 	CSR_WRITE_4(sc, RTK_IDR0, reg);
1869 	reg = enaddr[4] | (enaddr[5] << 8);
1870 	CSR_WRITE_4(sc, RTK_IDR4, reg);
1871 	CSR_WRITE_1(sc, RTK_EECMD, RTK_EEMODE_OFF);
1872 #endif
1873 
1874 	/*
1875 	 * For C+ mode, initialize the RX descriptors and mbufs.
1876 	 */
1877 	re_rx_list_init(sc);
1878 	re_tx_list_init(sc);
1879 
1880 	/*
1881 	 * Load the addresses of the RX and TX lists into the chip.
1882 	 */
1883 	CSR_WRITE_4(sc, RTK_RXLIST_ADDR_HI,
1884 	    RE_ADDR_HI(sc->re_ldata.re_rx_list_map->dm_segs[0].ds_addr));
1885 	CSR_WRITE_4(sc, RTK_RXLIST_ADDR_LO,
1886 	    RE_ADDR_LO(sc->re_ldata.re_rx_list_map->dm_segs[0].ds_addr));
1887 
1888 	CSR_WRITE_4(sc, RTK_TXLIST_ADDR_HI,
1889 	    RE_ADDR_HI(sc->re_ldata.re_tx_list_map->dm_segs[0].ds_addr));
1890 	CSR_WRITE_4(sc, RTK_TXLIST_ADDR_LO,
1891 	    RE_ADDR_LO(sc->re_ldata.re_tx_list_map->dm_segs[0].ds_addr));
1892 
1893 	if (sc->sc_quirk & RTKQ_RXDV_GATED) {
1894 		CSR_WRITE_4(sc, RTK_MISC,
1895 		    CSR_READ_4(sc, RTK_MISC) & ~RTK_MISC_RXDV_GATED_EN);
1896 	}
1897 
1898 	/*
1899 	 * Enable transmit and receive.
1900 	 */
1901 	if ((sc->sc_quirk & RTKQ_TXRXEN_LATER) == 0)
1902 		CSR_WRITE_1(sc, RTK_COMMAND, RTK_CMD_TX_ENB | RTK_CMD_RX_ENB);
1903 
1904 	/*
1905 	 * Set the initial TX and RX configuration.
1906 	 */
1907 	if (sc->re_testmode && (sc->sc_quirk & RTKQ_8169NONS) != 0) {
1908 		/* test mode is needed only for old 8169 */
1909 		CSR_WRITE_4(sc, RTK_TXCFG,
1910 		    RE_TXCFG_CONFIG | RTK_LOOPTEST_ON);
1911 	} else
1912 		CSR_WRITE_4(sc, RTK_TXCFG, RE_TXCFG_CONFIG);
1913 
1914 	CSR_WRITE_1(sc, RTK_EARLY_TX_THRESH, 16);
1915 
1916 	CSR_WRITE_4(sc, RTK_RXCFG, RE_RXCFG_CONFIG);
1917 
1918 	/* Set the individual bit to receive frames for this host only. */
1919 	rxcfg = CSR_READ_4(sc, RTK_RXCFG);
1920 	rxcfg |= RTK_RXCFG_RX_INDIV;
1921 
1922 	/* If we want promiscuous mode, set the allframes bit. */
1923 	if (ifp->if_flags & IFF_PROMISC)
1924 		rxcfg |= RTK_RXCFG_RX_ALLPHYS;
1925 	else
1926 		rxcfg &= ~RTK_RXCFG_RX_ALLPHYS;
1927 	CSR_WRITE_4(sc, RTK_RXCFG, rxcfg);
1928 
1929 	/*
1930 	 * Set capture broadcast bit to capture broadcast frames.
1931 	 */
1932 	if (ifp->if_flags & IFF_BROADCAST)
1933 		rxcfg |= RTK_RXCFG_RX_BROAD;
1934 	else
1935 		rxcfg &= ~RTK_RXCFG_RX_BROAD;
1936 	CSR_WRITE_4(sc, RTK_RXCFG, rxcfg);
1937 
1938 	/*
1939 	 * Program the multicast filter, if necessary.
1940 	 */
1941 	rtk_setmulti(sc);
1942 
1943 	/*
1944 	 * some chips require to enable TX/RX *AFTER* TX/RX configuration
1945 	 */
1946 	if ((sc->sc_quirk & RTKQ_TXRXEN_LATER) != 0)
1947 		CSR_WRITE_1(sc, RTK_COMMAND, RTK_CMD_TX_ENB | RTK_CMD_RX_ENB);
1948 
1949 	/*
1950 	 * Enable interrupts.
1951 	 */
1952 	if (sc->re_testmode)
1953 		CSR_WRITE_2(sc, RTK_IMR, 0);
1954 	else if ((sc->sc_quirk & RTKQ_IM_HW) != 0)
1955 		CSR_WRITE_2(sc, RTK_IMR, RTK_INTRS_IM_HW);
1956 	else
1957 		CSR_WRITE_2(sc, RTK_IMR, RTK_INTRS_CPLUS);
1958 
1959 	/* Start RX/TX process. */
1960 	CSR_WRITE_4(sc, RTK_MISSEDPKT, 0);
1961 #ifdef notdef
1962 	/* Enable receiver and transmitter. */
1963 	CSR_WRITE_1(sc, RTK_COMMAND, RTK_CMD_TX_ENB | RTK_CMD_RX_ENB);
1964 #endif
1965 
1966 	/*
1967 	 * Initialize the timer interrupt register so that
1968 	 * a timer interrupt will be generated once the timer
1969 	 * reaches a certain number of ticks. The timer is
1970 	 * reloaded on each transmit. This gives us TX interrupt
1971 	 * moderation, which dramatically improves TX frame rate.
1972 	 */
1973 
1974 	unsigned defer;		/* timer interval / ns */
1975 	unsigned period;	/* busclock period / ns */
1976 
1977 	/*
1978 	 * Maximum frame rate
1979 	 * 1500 byte PDU -> 81274 Hz
1980 	 *   46 byte PDU -> 1488096 Hz
1981 	 *
1982 	 * Deferring interrupts by up to 128us needs descriptors for
1983 	 * 1500 byte PDU -> 10.4 frames
1984 	 *   46 byte PDU -> 190.4 frames
1985 	 *
1986 	 */
1987 	defer = 128000;
1988 
1989 	if ((sc->sc_quirk & RTKQ_IM_HW) != 0) {
1990 		period = 1;
1991 		defer = 0;
1992 	} else if ((sc->sc_quirk & RTKQ_PCIE) != 0) {
1993 		period = 8;
1994 	} else {
1995 		switch (CSR_READ_1(sc, RTK_CFG2_BUSFREQ) & 0x7) {
1996 		case RTK_BUSFREQ_33MHZ:
1997 			period = 30;
1998 			break;
1999 		case RTK_BUSFREQ_66MHZ:
2000 			period = 15;
2001 			break;
2002 		default:
2003 			/* lowest possible clock */
2004 			period = 60;
2005 			break;
2006 		}
2007 	}
2008 
2009 	/* Timer Interrupt register address varies */
2010 	uint16_t re8139_reg;
2011 	if ((sc->sc_quirk & RTKQ_8139CPLUS) != 0)
2012 		re8139_reg = RTK_TIMERINT;
2013 	else
2014 		re8139_reg = RTK_TIMERINT_8169;
2015 	CSR_WRITE_4(sc, re8139_reg, defer / period);
2016 
2017 	if ((sc->sc_quirk & RTKQ_8139CPLUS) == 0) {
2018 		/*
2019 		 * For 8169 gigE NICs, set the max allowed RX packet
2020 		 * size so we can receive jumbo frames.
2021 		 */
2022 		CSR_WRITE_2(sc, RTK_MAXRXPKTLEN, 16383);
2023 	}
2024 
2025 	if (sc->re_testmode)
2026 		return 0;
2027 
2028 	CSR_WRITE_1(sc, RTK_CFG1, RTK_CFG1_DRVLOAD);
2029 
2030 	ifp->if_flags |= IFF_RUNNING;
2031 	ifp->if_flags &= ~IFF_OACTIVE;
2032 
2033 	callout_schedule(&sc->rtk_tick_ch, hz);
2034 
2035  out:
2036 	if (error) {
2037 		ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);
2038 		ifp->if_timer = 0;
2039 		printf("%s: interface not running\n",
2040 		    device_xname(sc->sc_dev));
2041 	}
2042 
2043 	return error;
2044 }
2045 
2046 static int
2047 re_ioctl(struct ifnet *ifp, u_long command, void *data)
2048 {
2049 	struct rtk_softc *sc = ifp->if_softc;
2050 	struct ifreq *ifr = data;
2051 	int s, error = 0;
2052 
2053 	s = splnet();
2054 
2055 	switch (command) {
2056 	case SIOCSIFMTU:
2057 		/*
2058 		 * Disable jumbo frames if it's not supported.
2059 		 */
2060 		if ((sc->sc_quirk & RTKQ_NOJUMBO) != 0 &&
2061 		    ifr->ifr_mtu > ETHERMTU) {
2062 			error = EINVAL;
2063 			break;
2064 		}
2065 
2066 		if (ifr->ifr_mtu < ETHERMIN || ifr->ifr_mtu > ETHERMTU_JUMBO)
2067 			error = EINVAL;
2068 		else if ((error = ifioctl_common(ifp, command, data)) ==
2069 		    ENETRESET)
2070 			error = 0;
2071 		break;
2072 	default:
2073 		if ((error = ether_ioctl(ifp, command, data)) != ENETRESET)
2074 			break;
2075 
2076 		error = 0;
2077 
2078 		if (command == SIOCSIFCAP)
2079 			error = (*ifp->if_init)(ifp);
2080 		else if (command != SIOCADDMULTI && command != SIOCDELMULTI)
2081 			;
2082 		else if (ifp->if_flags & IFF_RUNNING)
2083 			rtk_setmulti(sc);
2084 		break;
2085 	}
2086 
2087 	splx(s);
2088 
2089 	return error;
2090 }
2091 
2092 static void
2093 re_watchdog(struct ifnet *ifp)
2094 {
2095 	struct rtk_softc *sc;
2096 	int s;
2097 
2098 	sc = ifp->if_softc;
2099 	s = splnet();
2100 	printf("%s: watchdog timeout\n", device_xname(sc->sc_dev));
2101 	if_statinc(ifp, if_oerrors);
2102 
2103 	re_txeof(sc);
2104 	re_rxeof(sc);
2105 
2106 	re_init(ifp);
2107 
2108 	splx(s);
2109 }
2110 
2111 /*
2112  * Stop the adapter and free any mbufs allocated to the
2113  * RX and TX lists.
2114  */
2115 static void
2116 re_stop(struct ifnet *ifp, int disable)
2117 {
2118 	int i;
2119 	struct rtk_softc *sc = ifp->if_softc;
2120 
2121 	callout_stop(&sc->rtk_tick_ch);
2122 
2123 	mii_down(&sc->mii);
2124 
2125 	if ((sc->sc_quirk & RTKQ_CMDSTOP) != 0)
2126 		CSR_WRITE_1(sc, RTK_COMMAND, RTK_CMD_STOPREQ | RTK_CMD_TX_ENB |
2127 		    RTK_CMD_RX_ENB);
2128 	else
2129 		CSR_WRITE_1(sc, RTK_COMMAND, 0x00);
2130 	DELAY(1000);
2131 	CSR_WRITE_2(sc, RTK_IMR, 0x0000);
2132 	CSR_WRITE_2(sc, RTK_ISR, 0xFFFF);
2133 
2134 	if (sc->re_head != NULL) {
2135 		m_freem(sc->re_head);
2136 		sc->re_head = sc->re_tail = NULL;
2137 	}
2138 
2139 	/* Free the TX list buffers. */
2140 	for (i = 0; i < RE_TX_QLEN; i++) {
2141 		if (sc->re_ldata.re_txq[i].txq_mbuf != NULL) {
2142 			bus_dmamap_unload(sc->sc_dmat,
2143 			    sc->re_ldata.re_txq[i].txq_dmamap);
2144 			m_freem(sc->re_ldata.re_txq[i].txq_mbuf);
2145 			sc->re_ldata.re_txq[i].txq_mbuf = NULL;
2146 		}
2147 	}
2148 
2149 	/* Free the RX list buffers. */
2150 	for (i = 0; i < RE_RX_DESC_CNT; i++) {
2151 		if (sc->re_ldata.re_rxsoft[i].rxs_mbuf != NULL) {
2152 			bus_dmamap_unload(sc->sc_dmat,
2153 			    sc->re_ldata.re_rxsoft[i].rxs_dmamap);
2154 			m_freem(sc->re_ldata.re_rxsoft[i].rxs_mbuf);
2155 			sc->re_ldata.re_rxsoft[i].rxs_mbuf = NULL;
2156 		}
2157 	}
2158 
2159 	if (disable)
2160 		re_disable(sc);
2161 
2162 	ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);
2163 	ifp->if_timer = 0;
2164 }
2165