xref: /openbsd-src/sys/dev/pci/if_vmx.c (revision 4e1ee0786f11cc571bd0be17d38e46f635c719fc)
1 /*	$OpenBSD: if_vmx.c,v 1.67 2021/08/09 18:13:09 jan Exp $	*/
2 
3 /*
4  * Copyright (c) 2013 Tsubai Masanari
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include "bpfilter.h"
20 #include "kstat.h"
21 
22 #include <sys/param.h>
23 #include <sys/device.h>
24 #include <sys/mbuf.h>
25 #include <sys/socket.h>
26 #include <sys/sockio.h>
27 #include <sys/systm.h>
28 #include <sys/atomic.h>
29 #include <sys/intrmap.h>
30 #include <sys/kstat.h>
31 
32 #include <net/bpf.h>
33 #include <net/if.h>
34 #include <net/toeplitz.h>
35 #include <net/if_media.h>
36 
37 #include <netinet/in.h>
38 #include <netinet/if_ether.h>
39 #include <netinet/ip.h>
40 #include <netinet/tcp.h>
41 #include <netinet/udp.h>
42 
43 #include <machine/bus.h>
44 
45 #include <dev/pci/if_vmxreg.h>
46 #include <dev/pci/pcivar.h>
47 #include <dev/pci/pcidevs.h>
48 
49 #define VMX_MAX_QUEUES	MIN(VMXNET3_MAX_TX_QUEUES, VMXNET3_MAX_RX_QUEUES)
50 
51 #define NTXDESC 512 /* tx ring size */
52 #define NTXSEGS 8 /* tx descriptors per packet */
53 #define NRXDESC 512
54 #define NTXCOMPDESC NTXDESC
55 #define NRXCOMPDESC (NRXDESC * 2)	/* ring1 + ring2 */
56 
57 #define VMXNET3_DRIVER_VERSION 0x00010000
58 
59 #define VMX_TX_GEN	htole32(VMXNET3_TX_GEN_M << VMXNET3_TX_GEN_S)
60 #define VMX_TXC_GEN	htole32(VMXNET3_TXC_GEN_M << VMXNET3_TXC_GEN_S)
61 #define VMX_RX_GEN	htole32(VMXNET3_RX_GEN_M << VMXNET3_RX_GEN_S)
62 #define VMX_RXC_GEN	htole32(VMXNET3_RXC_GEN_M << VMXNET3_RXC_GEN_S)
63 
64 struct vmxnet3_softc;
65 
66 struct vmxnet3_txring {
67 	struct mbuf *m[NTXDESC];
68 	bus_dmamap_t dmap[NTXDESC];
69 	struct vmxnet3_txdesc *txd;
70 	u_int32_t gen;
71 	u_int prod;
72 	u_int cons;
73 };
74 
75 struct vmxnet3_rxring {
76 	struct vmxnet3_softc *sc;
77 	struct mbuf *m[NRXDESC];
78 	bus_dmamap_t dmap[NRXDESC];
79 	struct mutex mtx;
80 	struct if_rxring rxr;
81 	struct timeout refill;
82 	struct vmxnet3_rxdesc *rxd;
83 	u_int fill;
84 	u_int32_t gen;
85 	u_int8_t rid;
86 };
87 
88 struct vmxnet3_comp_ring {
89 	union {
90 		struct vmxnet3_txcompdesc *txcd;
91 		struct vmxnet3_rxcompdesc *rxcd;
92 	};
93 	u_int next;
94 	u_int32_t gen;
95 };
96 
97 struct vmxnet3_txqueue {
98 	struct vmxnet3_softc *sc; /* sigh */
99 	struct vmxnet3_txring cmd_ring;
100 	struct vmxnet3_comp_ring comp_ring;
101 	struct vmxnet3_txq_shared *ts;
102 	struct ifqueue *ifq;
103 	struct kstat *txkstat;
104 } __aligned(64);
105 
106 struct vmxnet3_rxqueue {
107 	struct vmxnet3_softc *sc; /* sigh */
108 	struct vmxnet3_rxring cmd_ring[2];
109 	struct vmxnet3_comp_ring comp_ring;
110 	struct vmxnet3_rxq_shared *rs;
111 	struct ifiqueue *ifiq;
112 	struct kstat *rxkstat;
113 } __aligned(64);
114 
115 struct vmxnet3_queue {
116 	struct vmxnet3_txqueue tx;
117 	struct vmxnet3_rxqueue rx;
118 	struct vmxnet3_softc *sc;
119 	char intrname[16];
120 	void *ih;
121 	int intr;
122 };
123 
124 struct vmxnet3_softc {
125 	struct device sc_dev;
126 	struct arpcom sc_arpcom;
127 	struct ifmedia sc_media;
128 
129 	bus_space_tag_t	sc_iot0;
130 	bus_space_tag_t	sc_iot1;
131 	bus_space_handle_t sc_ioh0;
132 	bus_space_handle_t sc_ioh1;
133 	bus_dma_tag_t sc_dmat;
134 	void *sc_ih;
135 
136 	int sc_nqueues;
137 	struct vmxnet3_queue *sc_q;
138 	struct intrmap *sc_intrmap;
139 
140 	struct vmxnet3_driver_shared *sc_ds;
141 	u_int8_t *sc_mcast;
142 	struct vmxnet3_upt1_rss_conf *sc_rss;
143 
144 #if NKSTAT > 0
145 	struct rwlock		sc_kstat_lock;
146 	struct timeval		sc_kstat_updated;
147 #endif
148 };
149 
150 #define JUMBO_LEN (1024 * 9)
151 #define DMAADDR(map) ((map)->dm_segs[0].ds_addr)
152 
153 #define READ_BAR0(sc, reg) bus_space_read_4((sc)->sc_iot0, (sc)->sc_ioh0, reg)
154 #define READ_BAR1(sc, reg) bus_space_read_4((sc)->sc_iot1, (sc)->sc_ioh1, reg)
155 #define WRITE_BAR0(sc, reg, val) \
156 	bus_space_write_4((sc)->sc_iot0, (sc)->sc_ioh0, reg, val)
157 #define WRITE_BAR1(sc, reg, val) \
158 	bus_space_write_4((sc)->sc_iot1, (sc)->sc_ioh1, reg, val)
159 #define WRITE_CMD(sc, cmd) WRITE_BAR1(sc, VMXNET3_BAR1_CMD, cmd)
160 
161 int vmxnet3_match(struct device *, void *, void *);
162 void vmxnet3_attach(struct device *, struct device *, void *);
163 int vmxnet3_dma_init(struct vmxnet3_softc *);
164 int vmxnet3_alloc_txring(struct vmxnet3_softc *, int, int);
165 int vmxnet3_alloc_rxring(struct vmxnet3_softc *, int, int);
166 void vmxnet3_txinit(struct vmxnet3_softc *, struct vmxnet3_txqueue *);
167 void vmxnet3_rxinit(struct vmxnet3_softc *, struct vmxnet3_rxqueue *);
168 void vmxnet3_txstop(struct vmxnet3_softc *, struct vmxnet3_txqueue *);
169 void vmxnet3_rxstop(struct vmxnet3_softc *, struct vmxnet3_rxqueue *);
170 void vmxnet3_link_state(struct vmxnet3_softc *);
171 void vmxnet3_enable_all_intrs(struct vmxnet3_softc *);
172 void vmxnet3_disable_all_intrs(struct vmxnet3_softc *);
173 int vmxnet3_intr(void *);
174 int vmxnet3_intr_intx(void *);
175 int vmxnet3_intr_event(void *);
176 int vmxnet3_intr_queue(void *);
177 void vmxnet3_evintr(struct vmxnet3_softc *);
178 void vmxnet3_txintr(struct vmxnet3_softc *, struct vmxnet3_txqueue *);
179 void vmxnet3_rxintr(struct vmxnet3_softc *, struct vmxnet3_rxqueue *);
180 void vmxnet3_rxfill_tick(void *);
181 void vmxnet3_rxfill(struct vmxnet3_rxring *);
182 void vmxnet3_iff(struct vmxnet3_softc *);
183 void vmxnet3_rx_csum(struct vmxnet3_rxcompdesc *, struct mbuf *);
184 void vmxnet3_stop(struct ifnet *);
185 void vmxnet3_reset(struct vmxnet3_softc *);
186 int vmxnet3_init(struct vmxnet3_softc *);
187 int vmxnet3_ioctl(struct ifnet *, u_long, caddr_t);
188 void vmxnet3_start(struct ifqueue *);
189 int vmxnet3_load_mbuf(struct vmxnet3_softc *, struct vmxnet3_txring *,
190     struct mbuf **);
191 void vmxnet3_watchdog(struct ifnet *);
192 void vmxnet3_media_status(struct ifnet *, struct ifmediareq *);
193 int vmxnet3_media_change(struct ifnet *);
194 void *vmxnet3_dma_allocmem(struct vmxnet3_softc *, u_int, u_int, bus_addr_t *);
195 
196 #if NKSTAT > 0
197 static void	vmx_kstat_init(struct vmxnet3_softc *);
198 static void	vmx_kstat_txstats(struct vmxnet3_softc *,
199 		    struct vmxnet3_txqueue *, int);
200 static void	vmx_kstat_rxstats(struct vmxnet3_softc *,
201 		    struct vmxnet3_rxqueue *, int);
202 #endif /* NKSTAT > 0 */
203 
204 const struct pci_matchid vmx_devices[] = {
205 	{ PCI_VENDOR_VMWARE, PCI_PRODUCT_VMWARE_NET_3 }
206 };
207 
208 struct cfattach vmx_ca = {
209 	sizeof(struct vmxnet3_softc), vmxnet3_match, vmxnet3_attach
210 };
211 
212 struct cfdriver vmx_cd = {
213 	NULL, "vmx", DV_IFNET
214 };
215 
216 int
217 vmxnet3_match(struct device *parent, void *match, void *aux)
218 {
219 	return (pci_matchbyid(aux, vmx_devices, nitems(vmx_devices)));
220 }
221 
222 void
223 vmxnet3_attach(struct device *parent, struct device *self, void *aux)
224 {
225 	struct vmxnet3_softc *sc = (void *)self;
226 	struct pci_attach_args *pa = aux;
227 	struct ifnet *ifp = &sc->sc_arpcom.ac_if;
228 	pci_intr_handle_t ih;
229 	const char *intrstr;
230 	u_int memtype, ver, macl, mach, intrcfg;
231 	u_char enaddr[ETHER_ADDR_LEN];
232 	int (*isr)(void *);
233 	int msix = 0;
234 	int i;
235 
236 	memtype = pci_mapreg_type(pa->pa_pc, pa->pa_tag, 0x10);
237 	if (pci_mapreg_map(pa, 0x10, memtype, 0, &sc->sc_iot0, &sc->sc_ioh0,
238 	    NULL, NULL, 0)) {
239 		printf(": failed to map BAR0\n");
240 		return;
241 	}
242 	memtype = pci_mapreg_type(pa->pa_pc, pa->pa_tag, 0x14);
243 	if (pci_mapreg_map(pa, 0x14, memtype, 0, &sc->sc_iot1, &sc->sc_ioh1,
244 	    NULL, NULL, 0)) {
245 		printf(": failed to map BAR1\n");
246 		return;
247 	}
248 
249 	ver = READ_BAR1(sc, VMXNET3_BAR1_VRRS);
250 	if ((ver & 0x1) == 0) {
251 		printf(": unsupported hardware version 0x%x\n", ver);
252 		return;
253 	}
254 	WRITE_BAR1(sc, VMXNET3_BAR1_VRRS, 1);
255 
256 	ver = READ_BAR1(sc, VMXNET3_BAR1_UVRS);
257 	if ((ver & 0x1) == 0) {
258 		printf(": incompatiable UPT version 0x%x\n", ver);
259 		return;
260 	}
261 	WRITE_BAR1(sc, VMXNET3_BAR1_UVRS, 1);
262 
263 	sc->sc_dmat = pa->pa_dmat;
264 
265 	WRITE_CMD(sc, VMXNET3_CMD_GET_INTRCFG);
266 	intrcfg = READ_BAR1(sc, VMXNET3_BAR1_CMD);
267 	isr = vmxnet3_intr;
268 	sc->sc_nqueues = 1;
269 
270 	switch (intrcfg & VMXNET3_INTRCFG_TYPE_MASK) {
271 	case VMXNET3_INTRCFG_TYPE_AUTO:
272 	case VMXNET3_INTRCFG_TYPE_MSIX:
273 		msix = pci_intr_msix_count(pa);
274 		if (msix > 0) {
275 			if (pci_intr_map_msix(pa, 0, &ih) == 0) {
276 				msix--; /* are there spares for tx/rx qs? */
277 				if (msix == 0)
278 					break;
279 
280 				isr = vmxnet3_intr_event;
281 				sc->sc_intrmap = intrmap_create(&sc->sc_dev,
282 				    msix, VMX_MAX_QUEUES, INTRMAP_POWEROF2);
283 				sc->sc_nqueues = intrmap_count(sc->sc_intrmap);
284 			}
285 			break;
286 		}
287 
288 		/* FALLTHROUGH */
289 	case VMXNET3_INTRCFG_TYPE_MSI:
290 		if (pci_intr_map_msi(pa, &ih) == 0)
291 			break;
292 
293 		/* FALLTHROUGH */
294 	case VMXNET3_INTRCFG_TYPE_INTX:
295 		isr = vmxnet3_intr_intx;
296 		if (pci_intr_map(pa, &ih) == 0)
297 			break;
298 
299 		printf(": failed to map interrupt\n");
300 		return;
301 	}
302 	sc->sc_ih = pci_intr_establish(pa->pa_pc, ih, IPL_NET | IPL_MPSAFE,
303 	    isr, sc, self->dv_xname);
304 	intrstr = pci_intr_string(pa->pa_pc, ih);
305 	if (intrstr)
306 		printf(": %s", intrstr);
307 
308 	sc->sc_q = mallocarray(sc->sc_nqueues, sizeof(*sc->sc_q),
309 	    M_DEVBUF, M_WAITOK|M_ZERO);
310 
311 	if (sc->sc_intrmap != NULL) {
312 		for (i = 0; i < sc->sc_nqueues; i++) {
313 			struct vmxnet3_queue *q;
314 			int vec;
315 
316 			q = &sc->sc_q[i];
317 			vec = i + 1;
318 			if (pci_intr_map_msix(pa, vec, &ih) != 0) {
319 				printf(", failed to map interrupt %d\n", vec);
320 				return;
321 			}
322 			snprintf(q->intrname, sizeof(q->intrname), "%s:%d",
323 			    self->dv_xname, i);
324 			q->ih = pci_intr_establish_cpu(pa->pa_pc, ih,
325 			    IPL_NET | IPL_MPSAFE,
326 			    intrmap_cpu(sc->sc_intrmap, i),
327 			    vmxnet3_intr_queue, q, q->intrname);
328 
329 			q->intr = vec;
330 			q->sc = sc;
331 		}
332 	}
333 
334 	if (vmxnet3_dma_init(sc)) {
335 		printf(": failed to setup DMA\n");
336 		return;
337 	}
338 
339 	printf(", %d queue%s", sc->sc_nqueues, sc->sc_nqueues > 1 ? "s" : "");
340 
341 	WRITE_CMD(sc, VMXNET3_CMD_GET_MACL);
342 	macl = READ_BAR1(sc, VMXNET3_BAR1_CMD);
343 	enaddr[0] = macl;
344 	enaddr[1] = macl >> 8;
345 	enaddr[2] = macl >> 16;
346 	enaddr[3] = macl >> 24;
347 	WRITE_CMD(sc, VMXNET3_CMD_GET_MACH);
348 	mach = READ_BAR1(sc, VMXNET3_BAR1_CMD);
349 	enaddr[4] = mach;
350 	enaddr[5] = mach >> 8;
351 
352 	WRITE_BAR1(sc, VMXNET3_BAR1_MACL, macl);
353 	WRITE_BAR1(sc, VMXNET3_BAR1_MACH, mach);
354 	printf(", address %s\n", ether_sprintf(enaddr));
355 
356 	bcopy(enaddr, sc->sc_arpcom.ac_enaddr, 6);
357 	strlcpy(ifp->if_xname, self->dv_xname, IFNAMSIZ);
358 	ifp->if_softc = sc;
359 	ifp->if_flags = IFF_BROADCAST | IFF_MULTICAST | IFF_SIMPLEX;
360 	ifp->if_xflags = IFXF_MPSAFE;
361 	ifp->if_ioctl = vmxnet3_ioctl;
362 	ifp->if_qstart = vmxnet3_start;
363 	ifp->if_watchdog = vmxnet3_watchdog;
364 	ifp->if_hardmtu = VMXNET3_MAX_MTU;
365 	ifp->if_capabilities = IFCAP_VLAN_MTU;
366 #if 0
367 	if (sc->sc_ds->upt_features & UPT1_F_CSUM)
368 		ifp->if_capabilities |= IFCAP_CSUM_TCPv4 | IFCAP_CSUM_UDPv4;
369 #endif
370 	if (sc->sc_ds->upt_features & UPT1_F_VLAN)
371 		ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING;
372 
373 	ifq_set_maxlen(&ifp->if_snd, NTXDESC);
374 
375 	ifmedia_init(&sc->sc_media, IFM_IMASK, vmxnet3_media_change,
376 	    vmxnet3_media_status);
377 	ifmedia_add(&sc->sc_media, IFM_ETHER|IFM_AUTO, 0, NULL);
378 	ifmedia_add(&sc->sc_media, IFM_ETHER|IFM_10G_T|IFM_FDX, 0, NULL);
379 	ifmedia_add(&sc->sc_media, IFM_ETHER|IFM_10G_T, 0, NULL);
380 	ifmedia_add(&sc->sc_media, IFM_ETHER|IFM_1000_T|IFM_FDX, 0, NULL);
381 	ifmedia_add(&sc->sc_media, IFM_ETHER|IFM_1000_T, 0, NULL);
382 	ifmedia_set(&sc->sc_media, IFM_ETHER|IFM_AUTO);
383 
384 	if_attach(ifp);
385 	ether_ifattach(ifp);
386 	vmxnet3_link_state(sc);
387 
388 	if_attach_queues(ifp, sc->sc_nqueues);
389 	if_attach_iqueues(ifp, sc->sc_nqueues);
390 
391 #if NKSTAT > 0
392 	vmx_kstat_init(sc);
393 #endif
394 
395 	for (i = 0; i < sc->sc_nqueues; i++) {
396 		ifp->if_ifqs[i]->ifq_softc = &sc->sc_q[i].tx;
397 		sc->sc_q[i].tx.ifq = ifp->if_ifqs[i];
398 		sc->sc_q[i].rx.ifiq = ifp->if_iqs[i];
399 
400 #if NKSTAT > 0
401 		vmx_kstat_txstats(sc, &sc->sc_q[i].tx, i);
402 		vmx_kstat_rxstats(sc, &sc->sc_q[i].rx, i);
403 #endif
404 	}
405 }
406 
407 int
408 vmxnet3_dma_init(struct vmxnet3_softc *sc)
409 {
410 	struct vmxnet3_driver_shared *ds;
411 	struct vmxnet3_txq_shared *ts;
412 	struct vmxnet3_rxq_shared *rs;
413 	bus_addr_t ds_pa, qs_pa, mcast_pa;
414 	int i, queue, qs_len, intr;
415 	u_int major, minor, release_code, rev;
416 
417 	qs_len = sc->sc_nqueues * (sizeof *ts + sizeof *rs);
418 	ts = vmxnet3_dma_allocmem(sc, qs_len, VMXNET3_DMADESC_ALIGN, &qs_pa);
419 	if (ts == NULL)
420 		return -1;
421 	for (queue = 0; queue < sc->sc_nqueues; queue++)
422 		sc->sc_q[queue].tx.ts = ts++;
423 	rs = (void *)ts;
424 	for (queue = 0; queue < sc->sc_nqueues; queue++)
425 		sc->sc_q[queue].rx.rs = rs++;
426 
427 	for (queue = 0; queue < sc->sc_nqueues; queue++) {
428 		intr = sc->sc_q[queue].intr;
429 
430 		if (vmxnet3_alloc_txring(sc, queue, intr))
431 			return -1;
432 		if (vmxnet3_alloc_rxring(sc, queue, intr))
433 			return -1;
434 	}
435 
436 	sc->sc_mcast = vmxnet3_dma_allocmem(sc, 682 * ETHER_ADDR_LEN, 32, &mcast_pa);
437 	if (sc->sc_mcast == NULL)
438 		return -1;
439 
440 	ds = vmxnet3_dma_allocmem(sc, sizeof *sc->sc_ds, 8, &ds_pa);
441 	if (ds == NULL)
442 		return -1;
443 	sc->sc_ds = ds;
444 	ds->magic = VMXNET3_REV1_MAGIC;
445 	ds->version = VMXNET3_DRIVER_VERSION;
446 
447 	/*
448 	 * XXX FreeBSD version uses following values:
449 	 * (Does the device behavior depend on them?)
450 	 *
451 	 * major = __FreeBSD_version / 100000;
452 	 * minor = (__FreeBSD_version / 1000) % 100;
453 	 * release_code = (__FreeBSD_version / 100) % 10;
454 	 * rev = __FreeBSD_version % 100;
455 	 */
456 	major = 0;
457 	minor = 0;
458 	release_code = 0;
459 	rev = 0;
460 #ifdef __LP64__
461 	ds->guest = release_code << 30 | rev << 22 | major << 14 | minor << 6
462 	    | VMXNET3_GOS_FREEBSD | VMXNET3_GOS_64BIT;
463 #else
464 	ds->guest = release_code << 30 | rev << 22 | major << 14 | minor << 6
465 	    | VMXNET3_GOS_FREEBSD | VMXNET3_GOS_32BIT;
466 #endif
467 	ds->vmxnet3_revision = 1;
468 	ds->upt_version = 1;
469 	ds->upt_features = UPT1_F_CSUM | UPT1_F_VLAN;
470 	ds->driver_data = ~0ULL;
471 	ds->driver_data_len = 0;
472 	ds->queue_shared = qs_pa;
473 	ds->queue_shared_len = qs_len;
474 	ds->mtu = VMXNET3_MAX_MTU;
475 	ds->ntxqueue = sc->sc_nqueues;
476 	ds->nrxqueue = sc->sc_nqueues;
477 	ds->mcast_table = mcast_pa;
478 	ds->automask = 1;
479 	ds->nintr = 1 + (sc->sc_intrmap != NULL ? sc->sc_nqueues : 0);
480 	ds->evintr = 0;
481 	ds->ictrl = VMXNET3_ICTRL_DISABLE_ALL;
482 	for (i = 0; i < ds->nintr; i++)
483 		ds->modlevel[i] = UPT1_IMOD_ADAPTIVE;
484 
485 	if (sc->sc_nqueues > 1) {
486 		struct vmxnet3_upt1_rss_conf *rsscfg;
487 		bus_addr_t rss_pa;
488 
489 		rsscfg = vmxnet3_dma_allocmem(sc, sizeof(*rsscfg), 8, &rss_pa);
490 
491 		rsscfg->hash_type = UPT1_RSS_HASH_TYPE_TCP_IPV4 |
492 		    UPT1_RSS_HASH_TYPE_IPV4 |
493 		    UPT1_RSS_HASH_TYPE_TCP_IPV6 |
494 		    UPT1_RSS_HASH_TYPE_IPV6;
495 		rsscfg->hash_func = UPT1_RSS_HASH_FUNC_TOEPLITZ;
496 		rsscfg->hash_key_size = sizeof(rsscfg->hash_key);
497 		stoeplitz_to_key(rsscfg->hash_key, sizeof(rsscfg->hash_key));
498 
499 		rsscfg->ind_table_size = sizeof(rsscfg->ind_table);
500 		for (i = 0; i < sizeof(rsscfg->ind_table); i++)
501 			rsscfg->ind_table[i] = i % sc->sc_nqueues;
502 
503 		ds->upt_features |= UPT1_F_RSS;
504 		ds->rss.version = 1;
505 		ds->rss.len = sizeof(*rsscfg);
506 		ds->rss.paddr = rss_pa;
507 
508 		sc->sc_rss = rsscfg;
509 	}
510 
511 	WRITE_BAR1(sc, VMXNET3_BAR1_DSL, ds_pa);
512 	WRITE_BAR1(sc, VMXNET3_BAR1_DSH, (u_int64_t)ds_pa >> 32);
513 	return 0;
514 }
515 
516 int
517 vmxnet3_alloc_txring(struct vmxnet3_softc *sc, int queue, int intr)
518 {
519 	struct vmxnet3_txqueue *tq = &sc->sc_q[queue].tx;
520 	struct vmxnet3_txq_shared *ts;
521 	struct vmxnet3_txring *ring = &tq->cmd_ring;
522 	struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring;
523 	bus_addr_t pa, comp_pa;
524 	int idx;
525 
526 	ring->txd = vmxnet3_dma_allocmem(sc, NTXDESC * sizeof ring->txd[0], 512, &pa);
527 	if (ring->txd == NULL)
528 		return -1;
529 	comp_ring->txcd = vmxnet3_dma_allocmem(sc,
530 	    NTXCOMPDESC * sizeof comp_ring->txcd[0], 512, &comp_pa);
531 	if (comp_ring->txcd == NULL)
532 		return -1;
533 
534 	for (idx = 0; idx < NTXDESC; idx++) {
535 		if (bus_dmamap_create(sc->sc_dmat, JUMBO_LEN, NTXSEGS,
536 		    VMXNET3_TX_LEN_M + 1, 0, BUS_DMA_NOWAIT, &ring->dmap[idx]))
537 			return -1;
538 	}
539 
540 	ts = tq->ts;
541 	bzero(ts, sizeof *ts);
542 	ts->npending = 0;
543 	ts->intr_threshold = 1;
544 	ts->cmd_ring = pa;
545 	ts->cmd_ring_len = NTXDESC;
546 	ts->comp_ring = comp_pa;
547 	ts->comp_ring_len = NTXCOMPDESC;
548 	ts->driver_data = ~0ULL;
549 	ts->driver_data_len = 0;
550 	ts->intr_idx = intr;
551 	ts->stopped = 1;
552 	ts->error = 0;
553 	return 0;
554 }
555 
556 int
557 vmxnet3_alloc_rxring(struct vmxnet3_softc *sc, int queue, int intr)
558 {
559 	struct vmxnet3_rxqueue *rq = &sc->sc_q[queue].rx;
560 	struct vmxnet3_rxq_shared *rs;
561 	struct vmxnet3_rxring *ring;
562 	struct vmxnet3_comp_ring *comp_ring;
563 	bus_addr_t pa[2], comp_pa;
564 	int i, idx;
565 
566 	for (i = 0; i < 2; i++) {
567 		ring = &rq->cmd_ring[i];
568 		ring->rxd = vmxnet3_dma_allocmem(sc, NRXDESC * sizeof ring->rxd[0],
569 		    512, &pa[i]);
570 		if (ring->rxd == NULL)
571 			return -1;
572 	}
573 	comp_ring = &rq->comp_ring;
574 	comp_ring->rxcd = vmxnet3_dma_allocmem(sc,
575 	    NRXCOMPDESC * sizeof comp_ring->rxcd[0], 512, &comp_pa);
576 	if (comp_ring->rxcd == NULL)
577 		return -1;
578 
579 	for (i = 0; i < 2; i++) {
580 		ring = &rq->cmd_ring[i];
581 		ring->sc = sc;
582 		ring->rid = i;
583 		mtx_init(&ring->mtx, IPL_NET);
584 		timeout_set(&ring->refill, vmxnet3_rxfill_tick, ring);
585 		for (idx = 0; idx < NRXDESC; idx++) {
586 			if (bus_dmamap_create(sc->sc_dmat, JUMBO_LEN, 1,
587 			    JUMBO_LEN, 0, BUS_DMA_NOWAIT, &ring->dmap[idx]))
588 				return -1;
589 		}
590 	}
591 
592 	rs = rq->rs;
593 	bzero(rs, sizeof *rs);
594 	rs->cmd_ring[0] = pa[0];
595 	rs->cmd_ring[1] = pa[1];
596 	rs->cmd_ring_len[0] = NRXDESC;
597 	rs->cmd_ring_len[1] = NRXDESC;
598 	rs->comp_ring = comp_pa;
599 	rs->comp_ring_len = NRXCOMPDESC;
600 	rs->driver_data = ~0ULL;
601 	rs->driver_data_len = 0;
602 	rs->intr_idx = intr;
603 	rs->stopped = 1;
604 	rs->error = 0;
605 	return 0;
606 }
607 
608 void
609 vmxnet3_txinit(struct vmxnet3_softc *sc, struct vmxnet3_txqueue *tq)
610 {
611 	struct vmxnet3_txring *ring = &tq->cmd_ring;
612 	struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring;
613 
614 	ring->cons = ring->prod = 0;
615 	ring->gen = VMX_TX_GEN;
616 	comp_ring->next = 0;
617 	comp_ring->gen = VMX_TXC_GEN;
618 	bzero(ring->txd, NTXDESC * sizeof ring->txd[0]);
619 	bzero(comp_ring->txcd, NTXCOMPDESC * sizeof comp_ring->txcd[0]);
620 }
621 
622 void
623 vmxnet3_rxfill_tick(void *arg)
624 {
625 	struct vmxnet3_rxring *ring = arg;
626 
627 	if (!mtx_enter_try(&ring->mtx))
628 		return;
629 
630 	vmxnet3_rxfill(ring);
631 	mtx_leave(&ring->mtx);
632 }
633 
634 void
635 vmxnet3_rxfill(struct vmxnet3_rxring *ring)
636 {
637 	struct vmxnet3_softc *sc = ring->sc;
638 	struct vmxnet3_rxdesc *rxd;
639 	struct mbuf *m;
640 	bus_dmamap_t map;
641 	u_int slots;
642 	unsigned int prod;
643 	uint32_t rgen;
644 	uint32_t type = htole32(VMXNET3_BTYPE_HEAD << VMXNET3_RX_BTYPE_S);
645 
646 	MUTEX_ASSERT_LOCKED(&ring->mtx);
647 
648 	prod = ring->fill;
649 	rgen = ring->gen;
650 
651 	for (slots = if_rxr_get(&ring->rxr, NRXDESC); slots > 0; slots--) {
652 		KASSERT(ring->m[prod] == NULL);
653 
654 		m = MCLGETL(NULL, M_DONTWAIT, JUMBO_LEN);
655 		if (m == NULL)
656 			break;
657 
658 		m->m_pkthdr.len = m->m_len = JUMBO_LEN;
659 		m_adj(m, ETHER_ALIGN);
660 
661 		map = ring->dmap[prod];
662 		if (bus_dmamap_load_mbuf(sc->sc_dmat, map, m, BUS_DMA_NOWAIT))
663 			panic("load mbuf");
664 
665 		bus_dmamap_sync(sc->sc_dmat, map, 0, map->dm_mapsize,
666 		    BUS_DMASYNC_PREREAD);
667 
668 		ring->m[prod] = m;
669 
670 		rxd = &ring->rxd[prod];
671 		rxd->rx_addr = htole64(DMAADDR(map));
672 		membar_producer();
673 		rxd->rx_word2 = (htole32(m->m_pkthdr.len & VMXNET3_RX_LEN_M) <<
674 		    VMXNET3_RX_LEN_S) | type | rgen;
675 
676 		if (++prod == NRXDESC) {
677 			prod = 0;
678 			rgen ^= VMX_RX_GEN;
679 		}
680 	}
681 	if_rxr_put(&ring->rxr, slots);
682 
683 	ring->fill = prod;
684 	ring->gen = rgen;
685 
686 	if (if_rxr_inuse(&ring->rxr) == 0)
687 		timeout_add(&ring->refill, 1);
688 }
689 
690 void
691 vmxnet3_rxinit(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rq)
692 {
693 	struct vmxnet3_rxring *ring;
694 	struct vmxnet3_comp_ring *comp_ring;
695 	int i;
696 
697 	for (i = 0; i < 2; i++) {
698 		ring = &rq->cmd_ring[i];
699 		ring->fill = 0;
700 		ring->gen = VMX_RX_GEN;
701 		bzero(ring->rxd, NRXDESC * sizeof ring->rxd[0]);
702 		if_rxr_init(&ring->rxr, 2, NRXDESC - 1);
703 	}
704 
705 	/* XXX only fill ring 0 */
706 	ring = &rq->cmd_ring[0];
707 	mtx_enter(&ring->mtx);
708 	vmxnet3_rxfill(ring);
709 	mtx_leave(&ring->mtx);
710 
711 	comp_ring = &rq->comp_ring;
712 	comp_ring->next = 0;
713 	comp_ring->gen = VMX_RXC_GEN;
714 	bzero(comp_ring->rxcd, NRXCOMPDESC * sizeof comp_ring->rxcd[0]);
715 }
716 
717 void
718 vmxnet3_txstop(struct vmxnet3_softc *sc, struct vmxnet3_txqueue *tq)
719 {
720 	struct vmxnet3_txring *ring = &tq->cmd_ring;
721 	int idx;
722 
723 	for (idx = 0; idx < NTXDESC; idx++) {
724 		if (ring->m[idx]) {
725 			bus_dmamap_unload(sc->sc_dmat, ring->dmap[idx]);
726 			m_freem(ring->m[idx]);
727 			ring->m[idx] = NULL;
728 		}
729 	}
730 }
731 
732 void
733 vmxnet3_rxstop(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rq)
734 {
735 	struct vmxnet3_rxring *ring;
736 	int i, idx;
737 
738 	for (i = 0; i < 2; i++) {
739 		ring = &rq->cmd_ring[i];
740 		timeout_del(&ring->refill);
741 		for (idx = 0; idx < NRXDESC; idx++) {
742 			struct mbuf *m = ring->m[idx];
743 			if (m == NULL)
744 				continue;
745 
746 			ring->m[idx] = NULL;
747 			m_freem(m);
748 			bus_dmamap_unload(sc->sc_dmat, ring->dmap[idx]);
749 		}
750 	}
751 }
752 
753 void
754 vmxnet3_link_state(struct vmxnet3_softc *sc)
755 {
756 	struct ifnet *ifp = &sc->sc_arpcom.ac_if;
757 	u_int x, link, speed;
758 
759 	WRITE_CMD(sc, VMXNET3_CMD_GET_LINK);
760 	x = READ_BAR1(sc, VMXNET3_BAR1_CMD);
761 	speed = x >> 16;
762 	if (x & 1) {
763 		ifp->if_baudrate = IF_Mbps(speed);
764 		link = LINK_STATE_UP;
765 	} else
766 		link = LINK_STATE_DOWN;
767 
768 	if (ifp->if_link_state != link) {
769 		ifp->if_link_state = link;
770 		if_link_state_change(ifp);
771 	}
772 }
773 
774 static inline void
775 vmxnet3_enable_intr(struct vmxnet3_softc *sc, int irq)
776 {
777 	WRITE_BAR0(sc, VMXNET3_BAR0_IMASK(irq), 0);
778 }
779 
780 static inline void
781 vmxnet3_disable_intr(struct vmxnet3_softc *sc, int irq)
782 {
783 	WRITE_BAR0(sc, VMXNET3_BAR0_IMASK(irq), 1);
784 }
785 
786 void
787 vmxnet3_enable_all_intrs(struct vmxnet3_softc *sc)
788 {
789 	int i;
790 
791 	sc->sc_ds->ictrl &= ~VMXNET3_ICTRL_DISABLE_ALL;
792 	vmxnet3_enable_intr(sc, 0);
793 	if (sc->sc_intrmap) {
794 		for (i = 0; i < sc->sc_nqueues; i++)
795 			vmxnet3_enable_intr(sc, sc->sc_q[i].intr);
796 	}
797 }
798 
799 void
800 vmxnet3_disable_all_intrs(struct vmxnet3_softc *sc)
801 {
802 	int i;
803 
804 	sc->sc_ds->ictrl |= VMXNET3_ICTRL_DISABLE_ALL;
805 	vmxnet3_disable_intr(sc, 0);
806 	if (sc->sc_intrmap) {
807 		for (i = 0; i < sc->sc_nqueues; i++)
808 			vmxnet3_disable_intr(sc, sc->sc_q[i].intr);
809 	}
810 }
811 
812 int
813 vmxnet3_intr_intx(void *arg)
814 {
815 	struct vmxnet3_softc *sc = arg;
816 
817 	if (READ_BAR1(sc, VMXNET3_BAR1_INTR) == 0)
818 		return 0;
819 
820 	return (vmxnet3_intr(sc));
821 }
822 
823 int
824 vmxnet3_intr(void *arg)
825 {
826 	struct vmxnet3_softc *sc = arg;
827 	struct ifnet *ifp = &sc->sc_arpcom.ac_if;
828 
829 	if (sc->sc_ds->event) {
830 		KERNEL_LOCK();
831 		vmxnet3_evintr(sc);
832 		KERNEL_UNLOCK();
833 	}
834 
835 	if (ifp->if_flags & IFF_RUNNING) {
836 		vmxnet3_rxintr(sc, &sc->sc_q[0].rx);
837 		vmxnet3_txintr(sc, &sc->sc_q[0].tx);
838 		vmxnet3_enable_intr(sc, 0);
839 	}
840 
841 	return 1;
842 }
843 
844 int
845 vmxnet3_intr_event(void *arg)
846 {
847 	struct vmxnet3_softc *sc = arg;
848 
849 	if (sc->sc_ds->event) {
850 		KERNEL_LOCK();
851 		vmxnet3_evintr(sc);
852 		KERNEL_UNLOCK();
853 	}
854 
855 	vmxnet3_enable_intr(sc, 0);
856 	return 1;
857 }
858 
859 int
860 vmxnet3_intr_queue(void *arg)
861 {
862 	struct vmxnet3_queue *q = arg;
863 
864 	vmxnet3_rxintr(q->sc, &q->rx);
865 	vmxnet3_txintr(q->sc, &q->tx);
866 	vmxnet3_enable_intr(q->sc, q->intr);
867 
868 	return 1;
869 }
870 
871 void
872 vmxnet3_evintr(struct vmxnet3_softc *sc)
873 {
874 	struct ifnet *ifp = &sc->sc_arpcom.ac_if;
875 	u_int event = sc->sc_ds->event;
876 	struct vmxnet3_txq_shared *ts;
877 	struct vmxnet3_rxq_shared *rs;
878 
879 	/* Clear events. */
880 	WRITE_BAR1(sc, VMXNET3_BAR1_EVENT, event);
881 
882 	/* Link state change? */
883 	if (event & VMXNET3_EVENT_LINK)
884 		vmxnet3_link_state(sc);
885 
886 	/* Queue error? */
887 	if (event & (VMXNET3_EVENT_TQERROR | VMXNET3_EVENT_RQERROR)) {
888 		WRITE_CMD(sc, VMXNET3_CMD_GET_STATUS);
889 
890 		ts = sc->sc_q[0].tx.ts;
891 		if (ts->stopped)
892 			printf("%s: TX error 0x%x\n", ifp->if_xname, ts->error);
893 		rs = sc->sc_q[0].rx.rs;
894 		if (rs->stopped)
895 			printf("%s: RX error 0x%x\n", ifp->if_xname, rs->error);
896 		vmxnet3_init(sc);
897 	}
898 
899 	if (event & VMXNET3_EVENT_DIC)
900 		printf("%s: device implementation change event\n",
901 		    ifp->if_xname);
902 	if (event & VMXNET3_EVENT_DEBUG)
903 		printf("%s: debug event\n", ifp->if_xname);
904 }
905 
906 void
907 vmxnet3_txintr(struct vmxnet3_softc *sc, struct vmxnet3_txqueue *tq)
908 {
909 	struct ifqueue *ifq = tq->ifq;
910 	struct vmxnet3_txring *ring = &tq->cmd_ring;
911 	struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring;
912 	struct vmxnet3_txcompdesc *txcd;
913 	bus_dmamap_t map;
914 	struct mbuf *m;
915 	u_int cons, next;
916 	uint32_t rgen;
917 
918 	cons = ring->cons;
919 	if (cons == ring->prod)
920 		return;
921 
922 	next = comp_ring->next;
923 	rgen = comp_ring->gen;
924 
925 	/* postread */
926 	for (;;) {
927 		txcd = &comp_ring->txcd[next];
928 		if ((txcd->txc_word3 & VMX_TXC_GEN) != rgen)
929 			break;
930 
931 		if (++next == NTXCOMPDESC) {
932 			next = 0;
933 			rgen ^= VMX_TXC_GEN;
934 		}
935 
936 		m = ring->m[cons];
937 		ring->m[cons] = NULL;
938 
939 		KASSERT(m != NULL);
940 
941 		map = ring->dmap[cons];
942 		bus_dmamap_unload(sc->sc_dmat, map);
943 		m_freem(m);
944 
945 		cons = (letoh32(txcd->txc_word0) >> VMXNET3_TXC_EOPIDX_S) &
946 		    VMXNET3_TXC_EOPIDX_M;
947 		cons++;
948 		cons %= NTXDESC;
949 	}
950 	/* preread */
951 
952 	comp_ring->next = next;
953 	comp_ring->gen = rgen;
954 	ring->cons = cons;
955 
956 	if (ifq_is_oactive(ifq))
957 		ifq_restart(ifq);
958 }
959 
960 void
961 vmxnet3_rxintr(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rq)
962 {
963 	struct ifnet *ifp = &sc->sc_arpcom.ac_if;
964 	struct vmxnet3_comp_ring *comp_ring = &rq->comp_ring;
965 	struct vmxnet3_rxring *ring;
966 	struct vmxnet3_rxcompdesc *rxcd;
967 	struct mbuf_list ml = MBUF_LIST_INITIALIZER();
968 	struct mbuf *m;
969 	bus_dmamap_t map;
970 	unsigned int idx, len;
971 	unsigned int next, rgen;
972 	unsigned int done = 0;
973 
974 	next = comp_ring->next;
975 	rgen = comp_ring->gen;
976 
977 	for (;;) {
978 		rxcd = &comp_ring->rxcd[next];
979 		if ((rxcd->rxc_word3 & VMX_RXC_GEN) != rgen)
980 			break;
981 
982 		if (++next == NRXCOMPDESC) {
983 			next = 0;
984 			rgen ^= VMX_RXC_GEN;
985 		}
986 
987 		idx = letoh32((rxcd->rxc_word0 >> VMXNET3_RXC_IDX_S) &
988 		    VMXNET3_RXC_IDX_M);
989 		if (letoh32((rxcd->rxc_word0 >> VMXNET3_RXC_QID_S) &
990 		    VMXNET3_RXC_QID_M) < sc->sc_nqueues)
991 			ring = &rq->cmd_ring[0];
992 		else
993 			ring = &rq->cmd_ring[1];
994 
995 		m = ring->m[idx];
996 		KASSERT(m != NULL);
997 		ring->m[idx] = NULL;
998 
999 		map = ring->dmap[idx];
1000 		bus_dmamap_sync(sc->sc_dmat, map, 0, map->dm_mapsize,
1001 		    BUS_DMASYNC_POSTREAD);
1002 		bus_dmamap_unload(sc->sc_dmat, map);
1003 
1004 		done++;
1005 
1006 		if (letoh32(rxcd->rxc_word2 & VMXNET3_RXC_ERROR)) {
1007 			ifp->if_ierrors++;
1008 			m_freem(m);
1009 			goto skip_buffer;
1010 		}
1011 
1012 		len = letoh32((rxcd->rxc_word2 >> VMXNET3_RXC_LEN_S) &
1013 		    VMXNET3_RXC_LEN_M);
1014 		if (len < VMXNET3_MIN_MTU) {
1015 			m_freem(m);
1016 			goto skip_buffer;
1017 		}
1018 		m->m_pkthdr.len = m->m_len = len;
1019 
1020 		vmxnet3_rx_csum(rxcd, m);
1021 		if (letoh32(rxcd->rxc_word2 & VMXNET3_RXC_VLAN)) {
1022 			m->m_flags |= M_VLANTAG;
1023 			m->m_pkthdr.ether_vtag = letoh32((rxcd->rxc_word2 >>
1024 			    VMXNET3_RXC_VLANTAG_S) & VMXNET3_RXC_VLANTAG_M);
1025 		}
1026 		if (((letoh32(rxcd->rxc_word0) >> VMXNET3_RXC_RSSTYPE_S) &
1027 		    VMXNET3_RXC_RSSTYPE_M) != VMXNET3_RXC_RSSTYPE_NONE) {
1028 			m->m_pkthdr.ph_flowid = letoh32(rxcd->rxc_word1);
1029 			SET(m->m_pkthdr.csum_flags, M_FLOWID);
1030 		}
1031 
1032 		ml_enqueue(&ml, m);
1033 
1034 skip_buffer:
1035 		if (rq->rs->update_rxhead) {
1036 			u_int qid = letoh32((rxcd->rxc_word0 >>
1037 			    VMXNET3_RXC_QID_S) & VMXNET3_RXC_QID_M);
1038 
1039 			idx = (idx + 1) % NRXDESC;
1040 			if (qid < sc->sc_nqueues) {
1041 				WRITE_BAR0(sc, VMXNET3_BAR0_RXH1(qid), idx);
1042 			} else {
1043 				qid -= sc->sc_nqueues;
1044 				WRITE_BAR0(sc, VMXNET3_BAR0_RXH2(qid), idx);
1045 			}
1046 		}
1047 	}
1048 
1049 	comp_ring->next = next;
1050 	comp_ring->gen = rgen;
1051 
1052 	if (done == 0)
1053 		return;
1054 
1055 	ring = &rq->cmd_ring[0];
1056 
1057 	if (ifiq_input(rq->ifiq, &ml))
1058 		if_rxr_livelocked(&ring->rxr);
1059 
1060 	/* XXX Should we (try to) allocate buffers for ring 2 too? */
1061 	mtx_enter(&ring->mtx);
1062 	if_rxr_put(&ring->rxr, done);
1063 	vmxnet3_rxfill(ring);
1064 	mtx_leave(&ring->mtx);
1065 }
1066 
1067 void
1068 vmxnet3_iff(struct vmxnet3_softc *sc)
1069 {
1070 	struct ifnet *ifp = &sc->sc_arpcom.ac_if;
1071 	struct arpcom *ac = &sc->sc_arpcom;
1072 	struct vmxnet3_driver_shared *ds = sc->sc_ds;
1073 	struct ether_multi *enm;
1074 	struct ether_multistep step;
1075 	u_int mode;
1076 	u_int8_t *p;
1077 
1078 	ds->mcast_tablelen = 0;
1079 	CLR(ifp->if_flags, IFF_ALLMULTI);
1080 
1081 	/*
1082 	 * Always accept broadcast frames.
1083 	 * Always accept frames destined to our station address.
1084 	 */
1085 	mode = VMXNET3_RXMODE_BCAST | VMXNET3_RXMODE_UCAST;
1086 
1087 	if (ISSET(ifp->if_flags, IFF_PROMISC) || ac->ac_multirangecnt > 0 ||
1088 	    ac->ac_multicnt > 682) {
1089 		SET(ifp->if_flags, IFF_ALLMULTI);
1090 		SET(mode, (VMXNET3_RXMODE_ALLMULTI | VMXNET3_RXMODE_MCAST));
1091 		if (ifp->if_flags & IFF_PROMISC)
1092 			SET(mode, VMXNET3_RXMODE_PROMISC);
1093 	} else {
1094 		p = sc->sc_mcast;
1095 		ETHER_FIRST_MULTI(step, ac, enm);
1096 		while (enm != NULL) {
1097 			bcopy(enm->enm_addrlo, p, ETHER_ADDR_LEN);
1098 
1099 			p += ETHER_ADDR_LEN;
1100 
1101 			ETHER_NEXT_MULTI(step, enm);
1102 		}
1103 
1104 		if (ac->ac_multicnt > 0) {
1105 			SET(mode, VMXNET3_RXMODE_MCAST);
1106 			ds->mcast_tablelen = p - sc->sc_mcast;
1107 		}
1108 	}
1109 
1110 	WRITE_CMD(sc, VMXNET3_CMD_SET_FILTER);
1111 	ds->rxmode = mode;
1112 	WRITE_CMD(sc, VMXNET3_CMD_SET_RXMODE);
1113 }
1114 
1115 
1116 void
1117 vmxnet3_rx_csum(struct vmxnet3_rxcompdesc *rxcd, struct mbuf *m)
1118 {
1119 	if (letoh32(rxcd->rxc_word0 & VMXNET3_RXC_NOCSUM))
1120 		return;
1121 
1122 	if ((rxcd->rxc_word3 & (VMXNET3_RXC_IPV4 | VMXNET3_RXC_IPSUM_OK)) ==
1123 	    (VMXNET3_RXC_IPV4 | VMXNET3_RXC_IPSUM_OK))
1124 		m->m_pkthdr.csum_flags |= M_IPV4_CSUM_IN_OK;
1125 
1126 	if (rxcd->rxc_word3 & VMXNET3_RXC_FRAGMENT)
1127 		return;
1128 
1129 	if (rxcd->rxc_word3 & (VMXNET3_RXC_TCP | VMXNET3_RXC_UDP)) {
1130 		if (rxcd->rxc_word3 & VMXNET3_RXC_CSUM_OK)
1131 			m->m_pkthdr.csum_flags |=
1132 			    M_TCP_CSUM_IN_OK | M_UDP_CSUM_IN_OK;
1133 	}
1134 }
1135 
1136 void
1137 vmxnet3_stop(struct ifnet *ifp)
1138 {
1139 	struct vmxnet3_softc *sc = ifp->if_softc;
1140 	int queue;
1141 
1142 	ifp->if_flags &= ~IFF_RUNNING;
1143 	ifq_clr_oactive(&ifp->if_snd);
1144 	ifp->if_timer = 0;
1145 
1146 	vmxnet3_disable_all_intrs(sc);
1147 
1148 	WRITE_CMD(sc, VMXNET3_CMD_DISABLE);
1149 
1150 	if (sc->sc_intrmap != NULL) {
1151 		for (queue = 0; queue < sc->sc_nqueues; queue++)
1152 			intr_barrier(sc->sc_q[queue].ih);
1153 	} else
1154 		intr_barrier(sc->sc_ih);
1155 
1156 	for (queue = 0; queue < sc->sc_nqueues; queue++)
1157 		vmxnet3_txstop(sc, &sc->sc_q[queue].tx);
1158 	for (queue = 0; queue < sc->sc_nqueues; queue++)
1159 		vmxnet3_rxstop(sc, &sc->sc_q[queue].rx);
1160 }
1161 
1162 void
1163 vmxnet3_reset(struct vmxnet3_softc *sc)
1164 {
1165 	WRITE_CMD(sc, VMXNET3_CMD_RESET);
1166 }
1167 
1168 int
1169 vmxnet3_init(struct vmxnet3_softc *sc)
1170 {
1171 	struct ifnet *ifp = &sc->sc_arpcom.ac_if;
1172 	int queue;
1173 
1174 	/*
1175 	 * Cancel pending I/O and free all RX/TX buffers.
1176 	 */
1177 	vmxnet3_stop(ifp);
1178 
1179 #if 0
1180 	/* Put controller into known state. */
1181 	vmxnet3_reset(sc);
1182 #endif
1183 
1184 	for (queue = 0; queue < sc->sc_nqueues; queue++)
1185 		vmxnet3_txinit(sc, &sc->sc_q[queue].tx);
1186 	for (queue = 0; queue < sc->sc_nqueues; queue++)
1187 		vmxnet3_rxinit(sc, &sc->sc_q[queue].rx);
1188 
1189 	for (queue = 0; queue < sc->sc_nqueues; queue++) {
1190 		WRITE_BAR0(sc, VMXNET3_BAR0_RXH1(queue), 0);
1191 		WRITE_BAR0(sc, VMXNET3_BAR0_RXH2(queue), 0);
1192 	}
1193 
1194 	WRITE_CMD(sc, VMXNET3_CMD_ENABLE);
1195 	if (READ_BAR1(sc, VMXNET3_BAR1_CMD)) {
1196 		printf("%s: failed to initialize\n", ifp->if_xname);
1197 		vmxnet3_stop(ifp);
1198 		return EIO;
1199 	}
1200 
1201 	/* Program promiscuous mode and multicast filters. */
1202 	vmxnet3_iff(sc);
1203 
1204 	vmxnet3_enable_all_intrs(sc);
1205 
1206 	vmxnet3_link_state(sc);
1207 
1208 	ifp->if_flags |= IFF_RUNNING;
1209 	ifq_clr_oactive(&ifp->if_snd);
1210 
1211 	return 0;
1212 }
1213 
1214 static int
1215 vmx_rxr_info(struct vmxnet3_softc *sc, struct if_rxrinfo *ifri)
1216 {
1217 	struct if_rxring_info *ifrs, *ifr;
1218 	int error;
1219 	unsigned int i;
1220 
1221 	ifrs = mallocarray(sc->sc_nqueues, sizeof(*ifrs),
1222 	    M_TEMP, M_WAITOK|M_ZERO|M_CANFAIL);
1223 	if (ifrs == NULL)
1224 		return (ENOMEM);
1225 
1226 	for (i = 0; i < sc->sc_nqueues; i++) {
1227 		struct if_rxring *rxr = &sc->sc_q[i].rx.cmd_ring[0].rxr;
1228 		ifr = &ifrs[i];
1229 
1230 		ifr->ifr_size = JUMBO_LEN;
1231 		snprintf(ifr->ifr_name, sizeof(ifr->ifr_name), "%u", i);
1232 		ifr->ifr_info = *rxr;
1233 	}
1234 
1235 	error = if_rxr_info_ioctl(ifri, i, ifrs);
1236 
1237 	free(ifrs, M_TEMP, i * sizeof(*ifrs));
1238 
1239 	return (error);
1240 }
1241 
1242 int
1243 vmxnet3_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
1244 {
1245 	struct vmxnet3_softc *sc = ifp->if_softc;
1246 	struct ifreq *ifr = (struct ifreq *)data;
1247 	int error = 0, s;
1248 
1249 	s = splnet();
1250 
1251 	switch (cmd) {
1252 	case SIOCSIFADDR:
1253 		ifp->if_flags |= IFF_UP;
1254 		if ((ifp->if_flags & IFF_RUNNING) == 0)
1255 			error = vmxnet3_init(sc);
1256 		break;
1257 	case SIOCSIFFLAGS:
1258 		if (ifp->if_flags & IFF_UP) {
1259 			if (ifp->if_flags & IFF_RUNNING)
1260 				error = ENETRESET;
1261 			else
1262 				error = vmxnet3_init(sc);
1263 		} else {
1264 			if (ifp->if_flags & IFF_RUNNING)
1265 				vmxnet3_stop(ifp);
1266 		}
1267 		break;
1268 	case SIOCSIFMEDIA:
1269 	case SIOCGIFMEDIA:
1270 		error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd);
1271 		break;
1272 	case SIOCGIFRXR:
1273 		error = vmx_rxr_info(sc, (struct if_rxrinfo *)ifr->ifr_data);
1274 		break;
1275 	default:
1276 		error = ether_ioctl(ifp, &sc->sc_arpcom, cmd, data);
1277 	}
1278 
1279 	if (error == ENETRESET) {
1280 		if (ifp->if_flags & IFF_RUNNING)
1281 			vmxnet3_iff(sc);
1282 		error = 0;
1283 	}
1284 
1285 	splx(s);
1286 	return error;
1287 }
1288 
1289 static inline int
1290 vmx_load_mbuf(bus_dma_tag_t dmat, bus_dmamap_t map, struct mbuf *m)
1291 {
1292 	int error;
1293 
1294 	error = bus_dmamap_load_mbuf(dmat, map, m,
1295 	    BUS_DMA_STREAMING | BUS_DMA_NOWAIT);
1296 	if (error != EFBIG)
1297 		return (error);
1298 
1299 	error = m_defrag(m, M_DONTWAIT);
1300 	if (error != 0)
1301 		return (error);
1302 
1303 	return (bus_dmamap_load_mbuf(dmat, map, m,
1304 	    BUS_DMA_STREAMING | BUS_DMA_NOWAIT));
1305 }
1306 
1307 void
1308 vmxnet3_start(struct ifqueue *ifq)
1309 {
1310 	struct ifnet *ifp = ifq->ifq_if;
1311 	struct vmxnet3_softc *sc = ifp->if_softc;
1312 	struct vmxnet3_txqueue *tq = ifq->ifq_softc;
1313 	struct vmxnet3_txring *ring = &tq->cmd_ring;
1314 	struct vmxnet3_txdesc *txd, *sop;
1315 	bus_dmamap_t map;
1316         unsigned int prod, free, i;
1317 	unsigned int post = 0;
1318 	uint32_t rgen, gen;
1319 
1320 	struct mbuf *m;
1321 
1322 	free = ring->cons;
1323 	prod = ring->prod;
1324 	if (free <= prod)
1325 		free += NTXDESC;
1326 	free -= prod;
1327 
1328 	rgen = ring->gen;
1329 
1330 	for (;;) {
1331 		if (free <= NTXSEGS) {
1332 			ifq_set_oactive(ifq);
1333 			break;
1334 		}
1335 
1336 		m = ifq_dequeue(ifq);
1337 		if (m == NULL)
1338 			break;
1339 
1340 		map = ring->dmap[prod];
1341 
1342 		if (vmx_load_mbuf(sc->sc_dmat, map, m) != 0) {
1343 			ifq->ifq_errors++;
1344 			m_freem(m);
1345 			continue;
1346 		}
1347 
1348 #if NBPFILTER > 0
1349 		if (ifp->if_bpf)
1350 			bpf_mtap_ether(ifp->if_bpf, m, BPF_DIRECTION_OUT);
1351 #endif
1352 
1353 		ring->m[prod] = m;
1354 
1355 		bus_dmamap_sync(sc->sc_dmat, map, 0,
1356 		    map->dm_mapsize, BUS_DMASYNC_PREWRITE);
1357 
1358 		gen = rgen ^ VMX_TX_GEN;
1359 		sop = &ring->txd[prod];
1360 		for (i = 0; i < map->dm_nsegs; i++) {
1361 			txd = &ring->txd[prod];
1362 			txd->tx_addr = htole64(map->dm_segs[i].ds_addr);
1363 			txd->tx_word2 = htole32(map->dm_segs[i].ds_len <<
1364 			    VMXNET3_TX_LEN_S) | gen;
1365 			txd->tx_word3 = 0;
1366 
1367 			if (++prod == NTXDESC) {
1368 				prod = 0;
1369 				rgen ^= VMX_TX_GEN;
1370 			}
1371 
1372 			gen = rgen;
1373 		}
1374 		txd->tx_word3 = htole32(VMXNET3_TX_EOP | VMXNET3_TX_COMPREQ);
1375 
1376 		if (ISSET(m->m_flags, M_VLANTAG)) {
1377 			sop->tx_word3 |= htole32(VMXNET3_TX_VTAG_MODE);
1378 			sop->tx_word3 |= htole32((m->m_pkthdr.ether_vtag &
1379 			    VMXNET3_TX_VLANTAG_M) << VMXNET3_TX_VLANTAG_S);
1380 		}
1381 
1382 		/* Change the ownership by flipping the "generation" bit */
1383 		membar_producer();
1384 		sop->tx_word2 ^= VMX_TX_GEN;
1385 
1386 		free -= i;
1387 		post = 1;
1388 	}
1389 
1390 	if (!post)
1391 		return;
1392 
1393 	ring->prod = prod;
1394 	ring->gen = rgen;
1395 
1396 	WRITE_BAR0(sc, VMXNET3_BAR0_TXH(0), prod);
1397 }
1398 
1399 void
1400 vmxnet3_watchdog(struct ifnet *ifp)
1401 {
1402 	struct vmxnet3_softc *sc = ifp->if_softc;
1403 	int s;
1404 
1405 	printf("%s: device timeout\n", ifp->if_xname);
1406 	s = splnet();
1407 	vmxnet3_init(sc);
1408 	splx(s);
1409 }
1410 
1411 void
1412 vmxnet3_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1413 {
1414 	struct vmxnet3_softc *sc = ifp->if_softc;
1415 
1416 	vmxnet3_link_state(sc);
1417 
1418 	ifmr->ifm_status = IFM_AVALID;
1419 	ifmr->ifm_active = IFM_ETHER;
1420 
1421 	if (ifp->if_link_state != LINK_STATE_UP)
1422 		return;
1423 
1424 	ifmr->ifm_status |= IFM_ACTIVE;
1425 
1426 	if (ifp->if_baudrate >= IF_Gbps(10))
1427 		ifmr->ifm_active |= IFM_10G_T;
1428 }
1429 
1430 int
1431 vmxnet3_media_change(struct ifnet *ifp)
1432 {
1433 	return 0;
1434 }
1435 
1436 void *
1437 vmxnet3_dma_allocmem(struct vmxnet3_softc *sc, u_int size, u_int align, bus_addr_t *pa)
1438 {
1439 	bus_dma_tag_t t = sc->sc_dmat;
1440 	bus_dma_segment_t segs[1];
1441 	bus_dmamap_t map;
1442 	caddr_t va;
1443 	int n;
1444 
1445 	if (bus_dmamem_alloc(t, size, align, 0, segs, 1, &n, BUS_DMA_NOWAIT))
1446 		return NULL;
1447 	if (bus_dmamem_map(t, segs, 1, size, &va, BUS_DMA_NOWAIT))
1448 		return NULL;
1449 	if (bus_dmamap_create(t, size, 1, size, 0, BUS_DMA_NOWAIT, &map))
1450 		return NULL;
1451 	if (bus_dmamap_load(t, map, va, size, NULL, BUS_DMA_NOWAIT))
1452 		return NULL;
1453 	bzero(va, size);
1454 	*pa = DMAADDR(map);
1455 	bus_dmamap_unload(t, map);
1456 	bus_dmamap_destroy(t, map);
1457 	return va;
1458 }
1459 
1460 #if NKSTAT > 0
1461 /*
1462  * "hardware" counters are exported as separate kstats for each tx
1463  * and rx ring, but the request for the hypervisor to update the
1464  * stats is done once at the controller level. we limit the number
1465  * of updates at the controller level to a rate of one per second to
1466  * debounce this a bit.
1467  */
1468 static const struct timeval vmx_kstat_rate = { 1, 0 };
1469 
1470 /*
1471  * all the vmx stats are 64 bit counters, we just need their name and units.
1472  */
1473 struct vmx_kstat_tpl {
1474 	const char		*name;
1475 	enum kstat_kv_unit	 unit;
1476 };
1477 
1478 static const struct vmx_kstat_tpl vmx_rx_kstat_tpl[UPT1_RxStats_count] = {
1479 	{ "LRO packets",	KSTAT_KV_U_PACKETS },
1480 	{ "LRO bytes",		KSTAT_KV_U_BYTES },
1481 	{ "ucast packets",	KSTAT_KV_U_PACKETS },
1482 	{ "ucast bytes",	KSTAT_KV_U_BYTES },
1483 	{ "mcast packets",	KSTAT_KV_U_PACKETS },
1484 	{ "mcast bytes",	KSTAT_KV_U_BYTES },
1485 	{ "bcast packets",	KSTAT_KV_U_PACKETS },
1486 	{ "bcast bytes",	KSTAT_KV_U_BYTES },
1487 	{ "no buffers",		KSTAT_KV_U_PACKETS },
1488 	{ "errors",		KSTAT_KV_U_PACKETS },
1489 };
1490 
1491 static const struct vmx_kstat_tpl vmx_tx_kstat_tpl[UPT1_TxStats_count] = {
1492 	{ "TSO packets",	KSTAT_KV_U_PACKETS },
1493 	{ "TSO bytes",		KSTAT_KV_U_BYTES },
1494 	{ "ucast packets",	KSTAT_KV_U_PACKETS },
1495 	{ "ucast bytes",	KSTAT_KV_U_BYTES },
1496 	{ "mcast packets",	KSTAT_KV_U_PACKETS },
1497 	{ "mcast bytes",	KSTAT_KV_U_BYTES },
1498 	{ "bcast packets",	KSTAT_KV_U_PACKETS },
1499 	{ "bcast bytes",	KSTAT_KV_U_BYTES },
1500 	{ "errors",		KSTAT_KV_U_PACKETS },
1501 	{ "discards",		KSTAT_KV_U_PACKETS },
1502 };
1503 
1504 static void
1505 vmx_kstat_init(struct vmxnet3_softc *sc)
1506 {
1507 	rw_init(&sc->sc_kstat_lock, "vmxkstat");
1508 }
1509 
1510 static int
1511 vmx_kstat_read(struct kstat *ks)
1512 {
1513 	struct vmxnet3_softc *sc = ks->ks_softc;
1514 	struct kstat_kv *kvs = ks->ks_data;
1515 	uint64_t *vs = ks->ks_ptr;
1516 	unsigned int n, i;
1517 
1518 	if (ratecheck(&sc->sc_kstat_updated, &vmx_kstat_rate)) {
1519 		WRITE_CMD(sc, VMXNET3_CMD_GET_STATS);
1520 		/* barrier? */
1521 	}
1522 
1523 	n = ks->ks_datalen / sizeof(*kvs);
1524 	for (i = 0; i < n; i++)
1525 		kstat_kv_u64(&kvs[i]) = lemtoh64(&vs[i]);
1526 
1527  	TIMEVAL_TO_TIMESPEC(&sc->sc_kstat_updated, &ks->ks_updated);
1528 
1529 	return (0);
1530 }
1531 
1532 static struct kstat *
1533 vmx_kstat_create(struct vmxnet3_softc *sc, const char *name, unsigned int unit,
1534     const struct vmx_kstat_tpl *tpls, unsigned int n, uint64_t *vs)
1535 {
1536 	struct kstat *ks;
1537 	struct kstat_kv *kvs;
1538 	unsigned int i;
1539 
1540 	ks = kstat_create(sc->sc_dev.dv_xname, 0, name, unit,
1541 	    KSTAT_T_KV, 0);
1542 	if (ks == NULL)
1543 		return (NULL);
1544 
1545 	kvs = mallocarray(n, sizeof(*kvs), M_DEVBUF, M_WAITOK|M_ZERO);
1546 	for (i = 0; i < n; i++) {
1547 		const struct vmx_kstat_tpl *tpl = &tpls[i];
1548 
1549 		kstat_kv_unit_init(&kvs[i], tpl->name,
1550 		    KSTAT_KV_T_COUNTER64, tpl->unit);
1551 	}
1552 
1553 	ks->ks_softc = sc;
1554 	kstat_set_wlock(ks, &sc->sc_kstat_lock);
1555 	ks->ks_ptr = vs;
1556 	ks->ks_data = kvs;
1557 	ks->ks_datalen = n * sizeof(*kvs);
1558 	ks->ks_read = vmx_kstat_read;
1559 	TIMEVAL_TO_TIMESPEC(&vmx_kstat_rate, &ks->ks_interval);
1560 
1561 	kstat_install(ks);
1562 
1563 	return (ks);
1564 }
1565 
1566 static void
1567 vmx_kstat_txstats(struct vmxnet3_softc *sc, struct vmxnet3_txqueue *tq,
1568     int unit)
1569 {
1570 	tq->txkstat = vmx_kstat_create(sc, "vmx-txstats", unit,
1571 	    vmx_tx_kstat_tpl, nitems(vmx_tx_kstat_tpl), tq->ts->stats);
1572 }
1573 
1574 static void
1575 vmx_kstat_rxstats(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rq,
1576     int unit)
1577 {
1578 	rq->rxkstat = vmx_kstat_create(sc, "vmx-rxstats", unit,
1579 	    vmx_rx_kstat_tpl, nitems(vmx_rx_kstat_tpl), rq->rs->stats);
1580 }
1581 #endif /* NKSTAT > 0 */
1582