xref: /openbsd-src/sys/dev/pci/if_vmx.c (revision 1ad61ae0a79a724d2d3ec69e69c8e1d1ff6b53a0)
1 /*	$OpenBSD: if_vmx.c,v 1.79 2023/11/10 15:51:24 bluhm Exp $	*/
2 
3 /*
4  * Copyright (c) 2013 Tsubai Masanari
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include "bpfilter.h"
20 #include "kstat.h"
21 
22 #include <sys/param.h>
23 #include <sys/device.h>
24 #include <sys/mbuf.h>
25 #include <sys/socket.h>
26 #include <sys/sockio.h>
27 #include <sys/systm.h>
28 #include <sys/atomic.h>
29 #include <sys/intrmap.h>
30 #include <sys/kstat.h>
31 
32 #include <net/bpf.h>
33 #include <net/if.h>
34 #include <net/toeplitz.h>
35 #include <net/if_media.h>
36 
37 #include <netinet/in.h>
38 #include <netinet/if_ether.h>
39 #include <netinet/ip.h>
40 #include <netinet/tcp.h>
41 #include <netinet/udp.h>
42 
43 #include <machine/bus.h>
44 
45 #include <dev/pci/if_vmxreg.h>
46 #include <dev/pci/pcivar.h>
47 #include <dev/pci/pcidevs.h>
48 
49 #define VMX_MAX_QUEUES	MIN(VMXNET3_MAX_TX_QUEUES, VMXNET3_MAX_RX_QUEUES)
50 
51 #define NTXDESC 512 /* tx ring size */
52 #define NTXSEGS 8 /* tx descriptors per packet */
53 #define NRXDESC 512
54 #define NTXCOMPDESC NTXDESC
55 #define NRXCOMPDESC (NRXDESC * 2)	/* ring1 + ring2 */
56 
57 #define VMXNET3_DRIVER_VERSION 0x00010000
58 
59 #define VMX_TX_GEN	htole32(VMXNET3_TX_GEN_M << VMXNET3_TX_GEN_S)
60 #define VMX_TXC_GEN	htole32(VMXNET3_TXC_GEN_M << VMXNET3_TXC_GEN_S)
61 #define VMX_RX_GEN	htole32(VMXNET3_RX_GEN_M << VMXNET3_RX_GEN_S)
62 #define VMX_RXC_GEN	htole32(VMXNET3_RXC_GEN_M << VMXNET3_RXC_GEN_S)
63 
64 struct vmx_dmamem {
65 	bus_dmamap_t		vdm_map;
66 	bus_dma_segment_t	vdm_seg;
67 	int			vdm_nsegs;
68 	size_t			vdm_size;
69 	caddr_t			vdm_kva;
70 };
71 
72 #define VMX_DMA_MAP(_vdm)	((_vdm)->vdm_map)
73 #define VMX_DMA_DVA(_vdm)	((_vdm)->vdm_map->dm_segs[0].ds_addr)
74 #define VMX_DMA_KVA(_vdm)	((void *)(_vdm)->vdm_kva)
75 #define VMX_DMA_LEN(_vdm)	((_vdm)->vdm_size)
76 
77 struct vmxnet3_softc;
78 
79 struct vmxnet3_txring {
80 	struct vmx_dmamem dmamem;
81 	struct mbuf *m[NTXDESC];
82 	bus_dmamap_t dmap[NTXDESC];
83 	struct vmxnet3_txdesc *txd;
84 	u_int32_t gen;
85 	volatile u_int prod;
86 	volatile u_int cons;
87 };
88 
89 struct vmxnet3_rxring {
90 	struct vmxnet3_softc *sc;
91 	struct vmxnet3_rxq_shared *rs; /* copy of the rxqueue rs */
92 	struct vmx_dmamem dmamem;
93 	struct mbuf *m[NRXDESC];
94 	bus_dmamap_t dmap[NRXDESC];
95 	struct mutex mtx;
96 	struct if_rxring rxr;
97 	struct timeout refill;
98 	struct vmxnet3_rxdesc *rxd;
99 	bus_size_t rxh;
100 	u_int fill;
101 	u_int32_t gen;
102 	u_int8_t rid;
103 };
104 
105 struct vmxnet3_comp_ring {
106 	struct vmx_dmamem dmamem;
107 	union {
108 		struct vmxnet3_txcompdesc *txcd;
109 		struct vmxnet3_rxcompdesc *rxcd;
110 	};
111 	u_int next;
112 	u_int32_t gen;
113 };
114 
115 struct vmxnet3_txqueue {
116 	struct vmxnet3_softc *sc; /* sigh */
117 	struct vmxnet3_txring cmd_ring;
118 	struct vmxnet3_comp_ring comp_ring;
119 	struct vmxnet3_txq_shared *ts;
120 	struct ifqueue *ifq;
121 	struct kstat *txkstat;
122 	unsigned int queue;
123 } __aligned(64);
124 
125 struct vmxnet3_rxqueue {
126 	struct vmxnet3_softc *sc; /* sigh */
127 	struct vmxnet3_rxring cmd_ring[2];
128 	struct vmxnet3_comp_ring comp_ring;
129 	struct vmxnet3_rxq_shared *rs;
130 	struct ifiqueue *ifiq;
131 	struct kstat *rxkstat;
132 } __aligned(64);
133 
134 struct vmxnet3_queue {
135 	struct vmxnet3_txqueue tx;
136 	struct vmxnet3_rxqueue rx;
137 	struct vmxnet3_softc *sc;
138 	char intrname[16];
139 	void *ih;
140 	int intr;
141 };
142 
143 struct vmxnet3_softc {
144 	struct device sc_dev;
145 	struct arpcom sc_arpcom;
146 	struct ifmedia sc_media;
147 
148 	bus_space_tag_t	sc_iot0;
149 	bus_space_tag_t	sc_iot1;
150 	bus_space_handle_t sc_ioh0;
151 	bus_space_handle_t sc_ioh1;
152 	bus_dma_tag_t sc_dmat;
153 	void *sc_ih;
154 
155 	int sc_nqueues;
156 	struct vmxnet3_queue *sc_q;
157 	struct intrmap *sc_intrmap;
158 
159 	struct vmxnet3_driver_shared *sc_ds;
160 	u_int8_t *sc_mcast;
161 	struct vmxnet3_upt1_rss_conf *sc_rss;
162 
163 #if NKSTAT > 0
164 	struct rwlock		sc_kstat_lock;
165 	struct timeval		sc_kstat_updated;
166 #endif
167 };
168 
169 #define JUMBO_LEN (1024 * 9)
170 #define DMAADDR(map) ((map)->dm_segs[0].ds_addr)
171 
172 #define READ_BAR0(sc, reg) bus_space_read_4((sc)->sc_iot0, (sc)->sc_ioh0, reg)
173 #define READ_BAR1(sc, reg) bus_space_read_4((sc)->sc_iot1, (sc)->sc_ioh1, reg)
174 #define WRITE_BAR0(sc, reg, val) \
175 	bus_space_write_4((sc)->sc_iot0, (sc)->sc_ioh0, reg, val)
176 #define WRITE_BAR1(sc, reg, val) \
177 	bus_space_write_4((sc)->sc_iot1, (sc)->sc_ioh1, reg, val)
178 #define WRITE_CMD(sc, cmd) WRITE_BAR1(sc, VMXNET3_BAR1_CMD, cmd)
179 
180 int vmxnet3_match(struct device *, void *, void *);
181 void vmxnet3_attach(struct device *, struct device *, void *);
182 int vmxnet3_dma_init(struct vmxnet3_softc *);
183 int vmxnet3_alloc_txring(struct vmxnet3_softc *, int, int);
184 int vmxnet3_alloc_rxring(struct vmxnet3_softc *, int, int);
185 void vmxnet3_txinit(struct vmxnet3_softc *, struct vmxnet3_txqueue *);
186 void vmxnet3_rxinit(struct vmxnet3_softc *, struct vmxnet3_rxqueue *);
187 void vmxnet3_txstop(struct vmxnet3_softc *, struct vmxnet3_txqueue *);
188 void vmxnet3_rxstop(struct vmxnet3_softc *, struct vmxnet3_rxqueue *);
189 void vmxnet3_link_state(struct vmxnet3_softc *);
190 void vmxnet3_enable_all_intrs(struct vmxnet3_softc *);
191 void vmxnet3_disable_all_intrs(struct vmxnet3_softc *);
192 int vmxnet3_intr(void *);
193 int vmxnet3_intr_intx(void *);
194 int vmxnet3_intr_event(void *);
195 int vmxnet3_intr_queue(void *);
196 void vmxnet3_evintr(struct vmxnet3_softc *);
197 void vmxnet3_txintr(struct vmxnet3_softc *, struct vmxnet3_txqueue *);
198 void vmxnet3_rxintr(struct vmxnet3_softc *, struct vmxnet3_rxqueue *);
199 void vmxnet3_rxfill_tick(void *);
200 void vmxnet3_rxfill(struct vmxnet3_rxring *);
201 void vmxnet3_iff(struct vmxnet3_softc *);
202 void vmxnet3_rx_csum(struct vmxnet3_rxcompdesc *, struct mbuf *);
203 void vmxnet3_stop(struct ifnet *);
204 void vmxnet3_reset(struct vmxnet3_softc *);
205 int vmxnet3_init(struct vmxnet3_softc *);
206 int vmxnet3_ioctl(struct ifnet *, u_long, caddr_t);
207 void vmxnet3_start(struct ifqueue *);
208 int vmxnet3_load_mbuf(struct vmxnet3_softc *, struct vmxnet3_txring *,
209     struct mbuf **);
210 void vmxnet3_watchdog(struct ifnet *);
211 void vmxnet3_media_status(struct ifnet *, struct ifmediareq *);
212 int vmxnet3_media_change(struct ifnet *);
213 void *vmxnet3_dma_allocmem(struct vmxnet3_softc *, u_int, u_int, bus_addr_t *);
214 
215 static int	vmx_dmamem_alloc(struct vmxnet3_softc *, struct vmx_dmamem *,
216 		    bus_size_t, u_int);
217 #ifdef notyet
218 static void	vmx_dmamem_free(struct vmxnet3_softc *, struct vmx_dmamem *);
219 #endif
220 
221 #if NKSTAT > 0
222 static void	vmx_kstat_init(struct vmxnet3_softc *);
223 static void	vmx_kstat_txstats(struct vmxnet3_softc *,
224 		    struct vmxnet3_txqueue *, int);
225 static void	vmx_kstat_rxstats(struct vmxnet3_softc *,
226 		    struct vmxnet3_rxqueue *, int);
227 #endif /* NKSTAT > 0 */
228 
229 const struct pci_matchid vmx_devices[] = {
230 	{ PCI_VENDOR_VMWARE, PCI_PRODUCT_VMWARE_NET_3 }
231 };
232 
233 const struct cfattach vmx_ca = {
234 	sizeof(struct vmxnet3_softc), vmxnet3_match, vmxnet3_attach
235 };
236 
237 struct cfdriver vmx_cd = {
238 	NULL, "vmx", DV_IFNET
239 };
240 
241 int
242 vmxnet3_match(struct device *parent, void *match, void *aux)
243 {
244 	return (pci_matchbyid(aux, vmx_devices, nitems(vmx_devices)));
245 }
246 
247 void
248 vmxnet3_attach(struct device *parent, struct device *self, void *aux)
249 {
250 	struct vmxnet3_softc *sc = (void *)self;
251 	struct pci_attach_args *pa = aux;
252 	struct ifnet *ifp = &sc->sc_arpcom.ac_if;
253 	pci_intr_handle_t ih;
254 	const char *intrstr;
255 	u_int memtype, ver, macl, mach, intrcfg;
256 	u_char enaddr[ETHER_ADDR_LEN];
257 	int (*isr)(void *);
258 	int msix = 0;
259 	int i;
260 
261 	memtype = pci_mapreg_type(pa->pa_pc, pa->pa_tag, 0x10);
262 	if (pci_mapreg_map(pa, 0x10, memtype, 0, &sc->sc_iot0, &sc->sc_ioh0,
263 	    NULL, NULL, 0)) {
264 		printf(": failed to map BAR0\n");
265 		return;
266 	}
267 	memtype = pci_mapreg_type(pa->pa_pc, pa->pa_tag, 0x14);
268 	if (pci_mapreg_map(pa, 0x14, memtype, 0, &sc->sc_iot1, &sc->sc_ioh1,
269 	    NULL, NULL, 0)) {
270 		printf(": failed to map BAR1\n");
271 		return;
272 	}
273 
274 	ver = READ_BAR1(sc, VMXNET3_BAR1_VRRS);
275 	if ((ver & 0x1) == 0) {
276 		printf(": unsupported hardware version 0x%x\n", ver);
277 		return;
278 	}
279 	WRITE_BAR1(sc, VMXNET3_BAR1_VRRS, 1);
280 
281 	ver = READ_BAR1(sc, VMXNET3_BAR1_UVRS);
282 	if ((ver & 0x1) == 0) {
283 		printf(": incompatible UPT version 0x%x\n", ver);
284 		return;
285 	}
286 	WRITE_BAR1(sc, VMXNET3_BAR1_UVRS, 1);
287 
288 	sc->sc_dmat = pa->pa_dmat;
289 
290 	WRITE_CMD(sc, VMXNET3_CMD_GET_INTRCFG);
291 	intrcfg = READ_BAR1(sc, VMXNET3_BAR1_CMD);
292 	isr = vmxnet3_intr;
293 	sc->sc_nqueues = 1;
294 
295 	switch (intrcfg & VMXNET3_INTRCFG_TYPE_MASK) {
296 	case VMXNET3_INTRCFG_TYPE_AUTO:
297 	case VMXNET3_INTRCFG_TYPE_MSIX:
298 		msix = pci_intr_msix_count(pa);
299 		if (msix > 0) {
300 			if (pci_intr_map_msix(pa, 0, &ih) == 0) {
301 				msix--; /* are there spares for tx/rx qs? */
302 				if (msix == 0)
303 					break;
304 
305 				isr = vmxnet3_intr_event;
306 				sc->sc_intrmap = intrmap_create(&sc->sc_dev,
307 				    msix, VMX_MAX_QUEUES, INTRMAP_POWEROF2);
308 				sc->sc_nqueues = intrmap_count(sc->sc_intrmap);
309 			}
310 			break;
311 		}
312 
313 		/* FALLTHROUGH */
314 	case VMXNET3_INTRCFG_TYPE_MSI:
315 		if (pci_intr_map_msi(pa, &ih) == 0)
316 			break;
317 
318 		/* FALLTHROUGH */
319 	case VMXNET3_INTRCFG_TYPE_INTX:
320 		isr = vmxnet3_intr_intx;
321 		if (pci_intr_map(pa, &ih) == 0)
322 			break;
323 
324 		printf(": failed to map interrupt\n");
325 		return;
326 	}
327 	intrstr = pci_intr_string(pa->pa_pc, ih);
328 	sc->sc_ih = pci_intr_establish(pa->pa_pc, ih, IPL_NET | IPL_MPSAFE,
329 	    isr, sc, self->dv_xname);
330 	if (sc->sc_ih == NULL) {
331 		printf(": unable to establish interrupt handler");
332 		if (intrstr != NULL)
333 			printf(" at %s", intrstr);
334 		printf("\n");
335 		return;
336 	}
337 	if (intrstr)
338 		printf(": %s", intrstr);
339 
340 	sc->sc_q = mallocarray(sc->sc_nqueues, sizeof(*sc->sc_q),
341 	    M_DEVBUF, M_WAITOK|M_ZERO);
342 
343 	if (sc->sc_intrmap != NULL) {
344 		for (i = 0; i < sc->sc_nqueues; i++) {
345 			struct vmxnet3_queue *q;
346 			int vec;
347 
348 			q = &sc->sc_q[i];
349 			vec = i + 1;
350 			if (pci_intr_map_msix(pa, vec, &ih) != 0) {
351 				printf(", failed to map interrupt %d\n", vec);
352 				return;
353 			}
354 			snprintf(q->intrname, sizeof(q->intrname), "%s:%d",
355 			    self->dv_xname, i);
356 			q->ih = pci_intr_establish_cpu(pa->pa_pc, ih,
357 			    IPL_NET | IPL_MPSAFE,
358 			    intrmap_cpu(sc->sc_intrmap, i),
359 			    vmxnet3_intr_queue, q, q->intrname);
360 			if (q->ih == NULL) {
361 				printf(": unable to establish interrupt %d\n",
362 				    vec);
363 				return;
364 			}
365 
366 			q->intr = vec;
367 			q->sc = sc;
368 		}
369 	}
370 
371 	if (vmxnet3_dma_init(sc)) {
372 		printf(": failed to setup DMA\n");
373 		return;
374 	}
375 
376 	printf(", %d queue%s", sc->sc_nqueues, sc->sc_nqueues > 1 ? "s" : "");
377 
378 	WRITE_CMD(sc, VMXNET3_CMD_GET_MACL);
379 	macl = READ_BAR1(sc, VMXNET3_BAR1_CMD);
380 	enaddr[0] = macl;
381 	enaddr[1] = macl >> 8;
382 	enaddr[2] = macl >> 16;
383 	enaddr[3] = macl >> 24;
384 	WRITE_CMD(sc, VMXNET3_CMD_GET_MACH);
385 	mach = READ_BAR1(sc, VMXNET3_BAR1_CMD);
386 	enaddr[4] = mach;
387 	enaddr[5] = mach >> 8;
388 
389 	WRITE_BAR1(sc, VMXNET3_BAR1_MACL, macl);
390 	WRITE_BAR1(sc, VMXNET3_BAR1_MACH, mach);
391 	printf(", address %s\n", ether_sprintf(enaddr));
392 
393 	bcopy(enaddr, sc->sc_arpcom.ac_enaddr, 6);
394 	strlcpy(ifp->if_xname, self->dv_xname, IFNAMSIZ);
395 	ifp->if_softc = sc;
396 	ifp->if_flags = IFF_BROADCAST | IFF_MULTICAST | IFF_SIMPLEX;
397 	ifp->if_xflags = IFXF_MPSAFE;
398 	ifp->if_ioctl = vmxnet3_ioctl;
399 	ifp->if_qstart = vmxnet3_start;
400 	ifp->if_watchdog = vmxnet3_watchdog;
401 	ifp->if_hardmtu = VMXNET3_MAX_MTU;
402 	ifp->if_capabilities = IFCAP_VLAN_MTU;
403 #if 0
404 	if (sc->sc_ds->upt_features & UPT1_F_CSUM)
405 		ifp->if_capabilities |= IFCAP_CSUM_TCPv4 | IFCAP_CSUM_UDPv4;
406 #endif
407 	if (sc->sc_ds->upt_features & UPT1_F_VLAN)
408 		ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING;
409 
410 	ifq_init_maxlen(&ifp->if_snd, NTXDESC);
411 
412 	ifmedia_init(&sc->sc_media, IFM_IMASK, vmxnet3_media_change,
413 	    vmxnet3_media_status);
414 	ifmedia_add(&sc->sc_media, IFM_ETHER|IFM_AUTO, 0, NULL);
415 	ifmedia_add(&sc->sc_media, IFM_ETHER|IFM_10G_T|IFM_FDX, 0, NULL);
416 	ifmedia_add(&sc->sc_media, IFM_ETHER|IFM_10G_T, 0, NULL);
417 	ifmedia_add(&sc->sc_media, IFM_ETHER|IFM_1000_T|IFM_FDX, 0, NULL);
418 	ifmedia_add(&sc->sc_media, IFM_ETHER|IFM_1000_T, 0, NULL);
419 	ifmedia_set(&sc->sc_media, IFM_ETHER|IFM_AUTO);
420 
421 	if_attach(ifp);
422 	ether_ifattach(ifp);
423 	vmxnet3_link_state(sc);
424 
425 	if_attach_queues(ifp, sc->sc_nqueues);
426 	if_attach_iqueues(ifp, sc->sc_nqueues);
427 
428 #if NKSTAT > 0
429 	vmx_kstat_init(sc);
430 #endif
431 
432 	for (i = 0; i < sc->sc_nqueues; i++) {
433 		ifp->if_ifqs[i]->ifq_softc = &sc->sc_q[i].tx;
434 		sc->sc_q[i].tx.ifq = ifp->if_ifqs[i];
435 		sc->sc_q[i].rx.ifiq = ifp->if_iqs[i];
436 
437 #if NKSTAT > 0
438 		vmx_kstat_txstats(sc, &sc->sc_q[i].tx, i);
439 		vmx_kstat_rxstats(sc, &sc->sc_q[i].rx, i);
440 #endif
441 	}
442 }
443 
444 int
445 vmxnet3_dma_init(struct vmxnet3_softc *sc)
446 {
447 	struct vmxnet3_driver_shared *ds;
448 	struct vmxnet3_txq_shared *ts;
449 	struct vmxnet3_rxq_shared *rs;
450 	bus_addr_t ds_pa, qs_pa, mcast_pa;
451 	int i, queue, qs_len, intr;
452 	u_int major, minor, release_code, rev;
453 
454 	qs_len = sc->sc_nqueues * (sizeof *ts + sizeof *rs);
455 	ts = vmxnet3_dma_allocmem(sc, qs_len, VMXNET3_DMADESC_ALIGN, &qs_pa);
456 	if (ts == NULL)
457 		return -1;
458 	for (queue = 0; queue < sc->sc_nqueues; queue++)
459 		sc->sc_q[queue].tx.ts = ts++;
460 	rs = (void *)ts;
461 	for (queue = 0; queue < sc->sc_nqueues; queue++)
462 		sc->sc_q[queue].rx.rs = rs++;
463 
464 	for (queue = 0; queue < sc->sc_nqueues; queue++) {
465 		intr = sc->sc_q[queue].intr;
466 
467 		if (vmxnet3_alloc_txring(sc, queue, intr))
468 			return -1;
469 		if (vmxnet3_alloc_rxring(sc, queue, intr))
470 			return -1;
471 	}
472 
473 	sc->sc_mcast = vmxnet3_dma_allocmem(sc, 682 * ETHER_ADDR_LEN, 32, &mcast_pa);
474 	if (sc->sc_mcast == NULL)
475 		return -1;
476 
477 	ds = vmxnet3_dma_allocmem(sc, sizeof *sc->sc_ds, 8, &ds_pa);
478 	if (ds == NULL)
479 		return -1;
480 	sc->sc_ds = ds;
481 	ds->magic = VMXNET3_REV1_MAGIC;
482 	ds->version = VMXNET3_DRIVER_VERSION;
483 
484 	/*
485 	 * XXX FreeBSD version uses following values:
486 	 * (Does the device behavior depend on them?)
487 	 *
488 	 * major = __FreeBSD_version / 100000;
489 	 * minor = (__FreeBSD_version / 1000) % 100;
490 	 * release_code = (__FreeBSD_version / 100) % 10;
491 	 * rev = __FreeBSD_version % 100;
492 	 */
493 	major = 0;
494 	minor = 0;
495 	release_code = 0;
496 	rev = 0;
497 #ifdef __LP64__
498 	ds->guest = release_code << 30 | rev << 22 | major << 14 | minor << 6
499 	    | VMXNET3_GOS_FREEBSD | VMXNET3_GOS_64BIT;
500 #else
501 	ds->guest = release_code << 30 | rev << 22 | major << 14 | minor << 6
502 	    | VMXNET3_GOS_FREEBSD | VMXNET3_GOS_32BIT;
503 #endif
504 	ds->vmxnet3_revision = 1;
505 	ds->upt_version = 1;
506 	ds->upt_features = UPT1_F_CSUM | UPT1_F_VLAN;
507 	ds->driver_data = ~0ULL;
508 	ds->driver_data_len = 0;
509 	ds->queue_shared = qs_pa;
510 	ds->queue_shared_len = qs_len;
511 	ds->mtu = VMXNET3_MAX_MTU;
512 	ds->ntxqueue = sc->sc_nqueues;
513 	ds->nrxqueue = sc->sc_nqueues;
514 	ds->mcast_table = mcast_pa;
515 	ds->automask = 1;
516 	ds->nintr = 1 + (sc->sc_intrmap != NULL ? sc->sc_nqueues : 0);
517 	ds->evintr = 0;
518 	ds->ictrl = VMXNET3_ICTRL_DISABLE_ALL;
519 	for (i = 0; i < ds->nintr; i++)
520 		ds->modlevel[i] = UPT1_IMOD_ADAPTIVE;
521 
522 	if (sc->sc_nqueues > 1) {
523 		struct vmxnet3_upt1_rss_conf *rsscfg;
524 		bus_addr_t rss_pa;
525 
526 		rsscfg = vmxnet3_dma_allocmem(sc, sizeof(*rsscfg), 8, &rss_pa);
527 
528 		rsscfg->hash_type = UPT1_RSS_HASH_TYPE_TCP_IPV4 |
529 		    UPT1_RSS_HASH_TYPE_IPV4 |
530 		    UPT1_RSS_HASH_TYPE_TCP_IPV6 |
531 		    UPT1_RSS_HASH_TYPE_IPV6;
532 		rsscfg->hash_func = UPT1_RSS_HASH_FUNC_TOEPLITZ;
533 		rsscfg->hash_key_size = sizeof(rsscfg->hash_key);
534 		stoeplitz_to_key(rsscfg->hash_key, sizeof(rsscfg->hash_key));
535 
536 		rsscfg->ind_table_size = sizeof(rsscfg->ind_table);
537 		for (i = 0; i < sizeof(rsscfg->ind_table); i++)
538 			rsscfg->ind_table[i] = i % sc->sc_nqueues;
539 
540 		ds->upt_features |= UPT1_F_RSS;
541 		ds->rss.version = 1;
542 		ds->rss.len = sizeof(*rsscfg);
543 		ds->rss.paddr = rss_pa;
544 
545 		sc->sc_rss = rsscfg;
546 	}
547 
548 	WRITE_BAR1(sc, VMXNET3_BAR1_DSL, ds_pa);
549 	WRITE_BAR1(sc, VMXNET3_BAR1_DSH, (u_int64_t)ds_pa >> 32);
550 	return 0;
551 }
552 
553 int
554 vmxnet3_alloc_txring(struct vmxnet3_softc *sc, int queue, int intr)
555 {
556 	struct vmxnet3_txqueue *tq = &sc->sc_q[queue].tx;
557 	struct vmxnet3_txq_shared *ts;
558 	struct vmxnet3_txring *ring = &tq->cmd_ring;
559 	struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring;
560 	int idx;
561 
562 	tq->queue = queue;
563 
564 	if (vmx_dmamem_alloc(sc, &ring->dmamem,
565 	    NTXDESC * sizeof(struct vmxnet3_txdesc), 512) != 0)
566 		return -1;
567 	ring->txd = VMX_DMA_KVA(&ring->dmamem);
568 	if (vmx_dmamem_alloc(sc, &comp_ring->dmamem,
569 	    NTXCOMPDESC * sizeof(comp_ring->txcd[0]), 512) != 0)
570 		return -1;
571 	comp_ring->txcd = VMX_DMA_KVA(&comp_ring->dmamem);
572 
573 	for (idx = 0; idx < NTXDESC; idx++) {
574 		if (bus_dmamap_create(sc->sc_dmat, JUMBO_LEN, NTXSEGS,
575 		    VMXNET3_TX_LEN_M + 1, 0, BUS_DMA_NOWAIT, &ring->dmap[idx]))
576 			return -1;
577 	}
578 
579 	ts = tq->ts;
580 	bzero(ts, sizeof *ts);
581 	ts->npending = 0;
582 	ts->intr_threshold = 1;
583 	ts->cmd_ring = VMX_DMA_DVA(&ring->dmamem);
584 	ts->cmd_ring_len = NTXDESC;
585 	ts->comp_ring = VMX_DMA_DVA(&comp_ring->dmamem);
586 	ts->comp_ring_len = NTXCOMPDESC;
587 	ts->driver_data = ~0ULL;
588 	ts->driver_data_len = 0;
589 	ts->intr_idx = intr;
590 	ts->stopped = 1;
591 	ts->error = 0;
592 	return 0;
593 }
594 
595 int
596 vmxnet3_alloc_rxring(struct vmxnet3_softc *sc, int queue, int intr)
597 {
598 	struct vmxnet3_rxqueue *rq = &sc->sc_q[queue].rx;
599 	struct vmxnet3_rxq_shared *rs;
600 	struct vmxnet3_rxring *ring;
601 	struct vmxnet3_comp_ring *comp_ring;
602 	int i, idx;
603 
604 	for (i = 0; i < 2; i++) {
605 		ring = &rq->cmd_ring[i];
606 		if (vmx_dmamem_alloc(sc, &ring->dmamem,
607 		    NRXDESC * sizeof(struct vmxnet3_rxdesc), 512) != 0)
608 			return -1;
609 		ring->rxd = VMX_DMA_KVA(&ring->dmamem);
610 	}
611 	comp_ring = &rq->comp_ring;
612 	if (vmx_dmamem_alloc(sc, &comp_ring->dmamem,
613 	    NRXCOMPDESC * sizeof(comp_ring->rxcd[0]), 512) != 0)
614 		return -1;
615 	comp_ring->rxcd = VMX_DMA_KVA(&comp_ring->dmamem);
616 
617 	for (i = 0; i < 2; i++) {
618 		ring = &rq->cmd_ring[i];
619 		ring->sc = sc;
620 		ring->rid = i;
621 		mtx_init(&ring->mtx, IPL_NET);
622 		timeout_set(&ring->refill, vmxnet3_rxfill_tick, ring);
623 		for (idx = 0; idx < NRXDESC; idx++) {
624 			if (bus_dmamap_create(sc->sc_dmat, JUMBO_LEN, 1,
625 			    JUMBO_LEN, 0, BUS_DMA_NOWAIT, &ring->dmap[idx]))
626 				return -1;
627 		}
628 
629 		ring->rs = rq->rs;
630 		ring->rxh = (i == 0) ?
631 		    VMXNET3_BAR0_RXH1(queue) : VMXNET3_BAR0_RXH2(queue);
632 	}
633 
634 	rs = rq->rs;
635 	bzero(rs, sizeof *rs);
636 	rs->cmd_ring[0] = VMX_DMA_DVA(&rq->cmd_ring[0].dmamem);
637 	rs->cmd_ring[1] = VMX_DMA_DVA(&rq->cmd_ring[1].dmamem);
638 	rs->cmd_ring_len[0] = NRXDESC;
639 	rs->cmd_ring_len[1] = NRXDESC;
640 	rs->comp_ring = VMX_DMA_DVA(&comp_ring->dmamem);
641 	rs->comp_ring_len = NRXCOMPDESC;
642 	rs->driver_data = ~0ULL;
643 	rs->driver_data_len = 0;
644 	rs->intr_idx = intr;
645 	rs->stopped = 1;
646 	rs->error = 0;
647 	return 0;
648 }
649 
650 void
651 vmxnet3_txinit(struct vmxnet3_softc *sc, struct vmxnet3_txqueue *tq)
652 {
653 	struct vmxnet3_txring *ring = &tq->cmd_ring;
654 	struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring;
655 
656 	ring->cons = ring->prod = 0;
657 	ring->gen = VMX_TX_GEN;
658 	comp_ring->next = 0;
659 	comp_ring->gen = VMX_TXC_GEN;
660 	memset(VMX_DMA_KVA(&ring->dmamem), 0,
661 	    VMX_DMA_LEN(&ring->dmamem));
662 	bus_dmamap_sync(sc->sc_dmat, VMX_DMA_MAP(&ring->dmamem),
663 	    0, VMX_DMA_LEN(&ring->dmamem), BUS_DMASYNC_PREWRITE);
664 	memset(VMX_DMA_KVA(&comp_ring->dmamem), 0,
665 	    VMX_DMA_LEN(&comp_ring->dmamem));
666 	bus_dmamap_sync(sc->sc_dmat, VMX_DMA_MAP(&comp_ring->dmamem),
667 	    0, VMX_DMA_LEN(&comp_ring->dmamem), BUS_DMASYNC_PREREAD);
668 
669 	ifq_clr_oactive(tq->ifq);
670 }
671 
672 void
673 vmxnet3_rxfill_tick(void *arg)
674 {
675 	struct vmxnet3_rxring *ring = arg;
676 
677 	if (!mtx_enter_try(&ring->mtx))
678 		return;
679 
680 	vmxnet3_rxfill(ring);
681 	mtx_leave(&ring->mtx);
682 }
683 
684 void
685 vmxnet3_rxfill(struct vmxnet3_rxring *ring)
686 {
687 	struct vmxnet3_softc *sc = ring->sc;
688 	struct vmxnet3_rxdesc *rxd;
689 	struct mbuf *m;
690 	bus_dmamap_t map;
691 	u_int slots;
692 	unsigned int prod;
693 	uint32_t rgen;
694 	uint32_t type = htole32(VMXNET3_BTYPE_HEAD << VMXNET3_RX_BTYPE_S);
695 
696 	MUTEX_ASSERT_LOCKED(&ring->mtx);
697 
698 	slots = if_rxr_get(&ring->rxr, NRXDESC);
699 	if (slots == 0)
700 		return;
701 
702 	prod = ring->fill;
703 	rgen = ring->gen;
704 
705 	bus_dmamap_sync(sc->sc_dmat, VMX_DMA_MAP(&ring->dmamem),
706 	    0, VMX_DMA_LEN(&ring->dmamem), BUS_DMASYNC_POSTWRITE);
707 
708 	do {
709 		KASSERT(ring->m[prod] == NULL);
710 
711 		m = MCLGETL(NULL, M_DONTWAIT, JUMBO_LEN);
712 		if (m == NULL)
713 			break;
714 
715 		m->m_pkthdr.len = m->m_len = JUMBO_LEN;
716 		m_adj(m, ETHER_ALIGN);
717 
718 		map = ring->dmap[prod];
719 		if (bus_dmamap_load_mbuf(sc->sc_dmat, map, m, BUS_DMA_NOWAIT))
720 			panic("load mbuf");
721 
722 		bus_dmamap_sync(sc->sc_dmat, map, 0, map->dm_mapsize,
723 		    BUS_DMASYNC_PREREAD);
724 
725 		ring->m[prod] = m;
726 
727 		rxd = &ring->rxd[prod];
728 		rxd->rx_addr = htole64(DMAADDR(map));
729 		bus_dmamap_sync(sc->sc_dmat, VMX_DMA_MAP(&ring->dmamem),
730 		    0, VMX_DMA_LEN(&ring->dmamem),
731 		    BUS_DMASYNC_PREWRITE|BUS_DMASYNC_POSTWRITE);
732 		rxd->rx_word2 = (htole32(m->m_pkthdr.len & VMXNET3_RX_LEN_M) <<
733 		    VMXNET3_RX_LEN_S) | type | rgen;
734 
735 		if (++prod == NRXDESC) {
736 			prod = 0;
737 			rgen ^= VMX_RX_GEN;
738 		}
739 	} while (--slots > 0);
740 
741 	bus_dmamap_sync(sc->sc_dmat, VMX_DMA_MAP(&ring->dmamem),
742 	    0, VMX_DMA_LEN(&ring->dmamem), BUS_DMASYNC_PREWRITE);
743 
744 	if_rxr_put(&ring->rxr, slots);
745 
746 	ring->fill = prod;
747 	ring->gen = rgen;
748 
749 	if (if_rxr_inuse(&ring->rxr) == 0)
750 		timeout_add(&ring->refill, 1);
751 
752 	if (ring->rs->update_rxhead)
753 		WRITE_BAR0(sc, ring->rxh, prod);
754 }
755 
756 void
757 vmxnet3_rxinit(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rq)
758 {
759 	struct vmxnet3_rxring *ring;
760 	struct vmxnet3_comp_ring *comp_ring;
761 	int i;
762 
763 	for (i = 0; i < 2; i++) {
764 		ring = &rq->cmd_ring[i];
765 		if_rxr_init(&ring->rxr, 2, NRXDESC - 1);
766 		ring->fill = 0;
767 		ring->gen = VMX_RX_GEN;
768 
769 		memset(VMX_DMA_KVA(&ring->dmamem), 0,
770 		    VMX_DMA_LEN(&ring->dmamem));
771 		bus_dmamap_sync(sc->sc_dmat, VMX_DMA_MAP(&ring->dmamem),
772 		    0, VMX_DMA_LEN(&ring->dmamem), BUS_DMASYNC_PREWRITE);
773 	}
774 
775 	/* XXX only fill ring 0 */
776 	ring = &rq->cmd_ring[0];
777 	mtx_enter(&ring->mtx);
778 	vmxnet3_rxfill(ring);
779 	mtx_leave(&ring->mtx);
780 
781 	comp_ring = &rq->comp_ring;
782 	comp_ring->next = 0;
783 	comp_ring->gen = VMX_RXC_GEN;
784 
785 	memset(VMX_DMA_KVA(&comp_ring->dmamem), 0,
786 	    VMX_DMA_LEN(&comp_ring->dmamem));
787 	bus_dmamap_sync(sc->sc_dmat, VMX_DMA_MAP(&comp_ring->dmamem),
788 	    0, VMX_DMA_LEN(&comp_ring->dmamem), BUS_DMASYNC_PREREAD);
789 }
790 
791 void
792 vmxnet3_txstop(struct vmxnet3_softc *sc, struct vmxnet3_txqueue *tq)
793 {
794 	struct vmxnet3_txring *ring = &tq->cmd_ring;
795 	struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring;
796 	struct ifqueue *ifq = tq->ifq;
797 	int idx;
798 
799 	bus_dmamap_sync(sc->sc_dmat, VMX_DMA_MAP(&comp_ring->dmamem),
800 	    0, VMX_DMA_LEN(&comp_ring->dmamem), BUS_DMASYNC_POSTREAD);
801 	bus_dmamap_sync(sc->sc_dmat, VMX_DMA_MAP(&ring->dmamem),
802 	    0, VMX_DMA_LEN(&ring->dmamem), BUS_DMASYNC_POSTWRITE);
803 
804 	for (idx = 0; idx < NTXDESC; idx++) {
805 		if (ring->m[idx]) {
806 			bus_dmamap_unload(sc->sc_dmat, ring->dmap[idx]);
807 			m_freem(ring->m[idx]);
808 			ring->m[idx] = NULL;
809 		}
810 	}
811 
812 	ifq_purge(ifq);
813 	ifq_clr_oactive(ifq);
814 }
815 
816 void
817 vmxnet3_rxstop(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rq)
818 {
819 	struct vmxnet3_rxring *ring;
820 	struct vmxnet3_comp_ring *comp_ring = &rq->comp_ring;
821 	int i, idx;
822 
823 	bus_dmamap_sync(sc->sc_dmat, VMX_DMA_MAP(&comp_ring->dmamem),
824 	    0, VMX_DMA_LEN(&comp_ring->dmamem), BUS_DMASYNC_POSTREAD);
825 
826 	for (i = 0; i < 2; i++) {
827 		ring = &rq->cmd_ring[i];
828 		bus_dmamap_sync(sc->sc_dmat, VMX_DMA_MAP(&ring->dmamem),
829 		    0, VMX_DMA_LEN(&ring->dmamem), BUS_DMASYNC_POSTWRITE);
830 		timeout_del(&ring->refill);
831 		for (idx = 0; idx < NRXDESC; idx++) {
832 			struct mbuf *m = ring->m[idx];
833 			if (m == NULL)
834 				continue;
835 
836 			ring->m[idx] = NULL;
837 			m_freem(m);
838 			bus_dmamap_unload(sc->sc_dmat, ring->dmap[idx]);
839 		}
840 	}
841 }
842 
843 void
844 vmxnet3_link_state(struct vmxnet3_softc *sc)
845 {
846 	struct ifnet *ifp = &sc->sc_arpcom.ac_if;
847 	u_int x, link, speed;
848 
849 	WRITE_CMD(sc, VMXNET3_CMD_GET_LINK);
850 	x = READ_BAR1(sc, VMXNET3_BAR1_CMD);
851 	speed = x >> 16;
852 	if (x & 1) {
853 		ifp->if_baudrate = IF_Mbps(speed);
854 		link = LINK_STATE_UP;
855 	} else
856 		link = LINK_STATE_DOWN;
857 
858 	if (ifp->if_link_state != link) {
859 		ifp->if_link_state = link;
860 		if_link_state_change(ifp);
861 	}
862 }
863 
864 static inline void
865 vmxnet3_enable_intr(struct vmxnet3_softc *sc, int irq)
866 {
867 	WRITE_BAR0(sc, VMXNET3_BAR0_IMASK(irq), 0);
868 }
869 
870 static inline void
871 vmxnet3_disable_intr(struct vmxnet3_softc *sc, int irq)
872 {
873 	WRITE_BAR0(sc, VMXNET3_BAR0_IMASK(irq), 1);
874 }
875 
876 void
877 vmxnet3_enable_all_intrs(struct vmxnet3_softc *sc)
878 {
879 	int i;
880 
881 	sc->sc_ds->ictrl &= ~VMXNET3_ICTRL_DISABLE_ALL;
882 	vmxnet3_enable_intr(sc, 0);
883 	if (sc->sc_intrmap) {
884 		for (i = 0; i < sc->sc_nqueues; i++)
885 			vmxnet3_enable_intr(sc, sc->sc_q[i].intr);
886 	}
887 }
888 
889 void
890 vmxnet3_disable_all_intrs(struct vmxnet3_softc *sc)
891 {
892 	int i;
893 
894 	sc->sc_ds->ictrl |= VMXNET3_ICTRL_DISABLE_ALL;
895 	vmxnet3_disable_intr(sc, 0);
896 	if (sc->sc_intrmap) {
897 		for (i = 0; i < sc->sc_nqueues; i++)
898 			vmxnet3_disable_intr(sc, sc->sc_q[i].intr);
899 	}
900 }
901 
902 int
903 vmxnet3_intr_intx(void *arg)
904 {
905 	struct vmxnet3_softc *sc = arg;
906 
907 	if (READ_BAR1(sc, VMXNET3_BAR1_INTR) == 0)
908 		return 0;
909 
910 	return (vmxnet3_intr(sc));
911 }
912 
913 int
914 vmxnet3_intr(void *arg)
915 {
916 	struct vmxnet3_softc *sc = arg;
917 	struct ifnet *ifp = &sc->sc_arpcom.ac_if;
918 
919 	if (sc->sc_ds->event) {
920 		KERNEL_LOCK();
921 		vmxnet3_evintr(sc);
922 		KERNEL_UNLOCK();
923 	}
924 
925 	if (ifp->if_flags & IFF_RUNNING) {
926 		vmxnet3_rxintr(sc, &sc->sc_q[0].rx);
927 		vmxnet3_txintr(sc, &sc->sc_q[0].tx);
928 		vmxnet3_enable_intr(sc, 0);
929 	}
930 
931 	return 1;
932 }
933 
934 int
935 vmxnet3_intr_event(void *arg)
936 {
937 	struct vmxnet3_softc *sc = arg;
938 
939 	if (sc->sc_ds->event) {
940 		KERNEL_LOCK();
941 		vmxnet3_evintr(sc);
942 		KERNEL_UNLOCK();
943 	}
944 
945 	vmxnet3_enable_intr(sc, 0);
946 	return 1;
947 }
948 
949 int
950 vmxnet3_intr_queue(void *arg)
951 {
952 	struct vmxnet3_queue *q = arg;
953 
954 	vmxnet3_rxintr(q->sc, &q->rx);
955 	vmxnet3_txintr(q->sc, &q->tx);
956 	vmxnet3_enable_intr(q->sc, q->intr);
957 
958 	return 1;
959 }
960 
961 void
962 vmxnet3_evintr(struct vmxnet3_softc *sc)
963 {
964 	struct ifnet *ifp = &sc->sc_arpcom.ac_if;
965 	u_int event = sc->sc_ds->event;
966 	struct vmxnet3_txq_shared *ts;
967 	struct vmxnet3_rxq_shared *rs;
968 
969 	/* Clear events. */
970 	WRITE_BAR1(sc, VMXNET3_BAR1_EVENT, event);
971 
972 	/* Link state change? */
973 	if (event & VMXNET3_EVENT_LINK)
974 		vmxnet3_link_state(sc);
975 
976 	/* Queue error? */
977 	if (event & (VMXNET3_EVENT_TQERROR | VMXNET3_EVENT_RQERROR)) {
978 		WRITE_CMD(sc, VMXNET3_CMD_GET_STATUS);
979 
980 		ts = sc->sc_q[0].tx.ts;
981 		if (ts->stopped)
982 			printf("%s: TX error 0x%x\n", ifp->if_xname, ts->error);
983 		rs = sc->sc_q[0].rx.rs;
984 		if (rs->stopped)
985 			printf("%s: RX error 0x%x\n", ifp->if_xname, rs->error);
986 		vmxnet3_init(sc);
987 	}
988 
989 	if (event & VMXNET3_EVENT_DIC)
990 		printf("%s: device implementation change event\n",
991 		    ifp->if_xname);
992 	if (event & VMXNET3_EVENT_DEBUG)
993 		printf("%s: debug event\n", ifp->if_xname);
994 }
995 
996 void
997 vmxnet3_txintr(struct vmxnet3_softc *sc, struct vmxnet3_txqueue *tq)
998 {
999 	struct ifqueue *ifq = tq->ifq;
1000 	struct vmxnet3_txring *ring = &tq->cmd_ring;
1001 	struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring;
1002 	struct vmxnet3_txcompdesc *txcd;
1003 	bus_dmamap_t map;
1004 	struct mbuf *m;
1005 	u_int prod, cons, next;
1006 	uint32_t rgen;
1007 
1008 	prod = ring->prod;
1009 	cons = ring->cons;
1010 
1011 	if (cons == prod)
1012 		return;
1013 
1014 	next = comp_ring->next;
1015 	rgen = comp_ring->gen;
1016 
1017 	bus_dmamap_sync(sc->sc_dmat, VMX_DMA_MAP(&comp_ring->dmamem),
1018 	    0, VMX_DMA_LEN(&comp_ring->dmamem), BUS_DMASYNC_POSTREAD);
1019 
1020 	do {
1021 		txcd = &comp_ring->txcd[next];
1022 		if ((txcd->txc_word3 & VMX_TXC_GEN) != rgen)
1023 			break;
1024 
1025 		if (++next == NTXCOMPDESC) {
1026 			next = 0;
1027 			rgen ^= VMX_TXC_GEN;
1028 		}
1029 
1030 		m = ring->m[cons];
1031 		ring->m[cons] = NULL;
1032 
1033 		KASSERT(m != NULL);
1034 
1035 		map = ring->dmap[cons];
1036 		bus_dmamap_unload(sc->sc_dmat, map);
1037 		m_freem(m);
1038 
1039 		cons = (letoh32(txcd->txc_word0) >> VMXNET3_TXC_EOPIDX_S) &
1040 		    VMXNET3_TXC_EOPIDX_M;
1041 		cons++;
1042 		cons %= NTXDESC;
1043 	} while (cons != prod);
1044 
1045 	bus_dmamap_sync(sc->sc_dmat, VMX_DMA_MAP(&comp_ring->dmamem),
1046 	    0, VMX_DMA_LEN(&comp_ring->dmamem), BUS_DMASYNC_PREREAD);
1047 
1048 	comp_ring->next = next;
1049 	comp_ring->gen = rgen;
1050 	ring->cons = cons;
1051 
1052 	if (ifq_is_oactive(ifq))
1053 		ifq_restart(ifq);
1054 }
1055 
1056 void
1057 vmxnet3_rxintr(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rq)
1058 {
1059 	struct ifnet *ifp = &sc->sc_arpcom.ac_if;
1060 	struct vmxnet3_comp_ring *comp_ring = &rq->comp_ring;
1061 	struct vmxnet3_rxring *ring;
1062 	struct vmxnet3_rxcompdesc *rxcd;
1063 	struct mbuf_list ml = MBUF_LIST_INITIALIZER();
1064 	struct mbuf *m;
1065 	bus_dmamap_t map;
1066 	unsigned int idx, len;
1067 	unsigned int next, rgen;
1068 	unsigned int done = 0;
1069 
1070 	next = comp_ring->next;
1071 	rgen = comp_ring->gen;
1072 
1073 	bus_dmamap_sync(sc->sc_dmat, VMX_DMA_MAP(&comp_ring->dmamem),
1074 	    0, VMX_DMA_LEN(&comp_ring->dmamem), BUS_DMASYNC_POSTREAD);
1075 
1076 	for (;;) {
1077 		rxcd = &comp_ring->rxcd[next];
1078 		if ((rxcd->rxc_word3 & VMX_RXC_GEN) != rgen)
1079 			break;
1080 
1081 		if (++next == NRXCOMPDESC) {
1082 			next = 0;
1083 			rgen ^= VMX_RXC_GEN;
1084 		}
1085 
1086 		idx = letoh32((rxcd->rxc_word0 >> VMXNET3_RXC_IDX_S) &
1087 		    VMXNET3_RXC_IDX_M);
1088 		if (letoh32((rxcd->rxc_word0 >> VMXNET3_RXC_QID_S) &
1089 		    VMXNET3_RXC_QID_M) < sc->sc_nqueues)
1090 			ring = &rq->cmd_ring[0];
1091 		else
1092 			ring = &rq->cmd_ring[1];
1093 
1094 		m = ring->m[idx];
1095 		KASSERT(m != NULL);
1096 		ring->m[idx] = NULL;
1097 
1098 		map = ring->dmap[idx];
1099 		bus_dmamap_sync(sc->sc_dmat, map, 0, map->dm_mapsize,
1100 		    BUS_DMASYNC_POSTREAD);
1101 		bus_dmamap_unload(sc->sc_dmat, map);
1102 
1103 		done++;
1104 
1105 		if (letoh32(rxcd->rxc_word2 & VMXNET3_RXC_ERROR)) {
1106 			ifp->if_ierrors++;
1107 			m_freem(m);
1108 			continue;
1109 		}
1110 
1111 		len = letoh32((rxcd->rxc_word2 >> VMXNET3_RXC_LEN_S) &
1112 		    VMXNET3_RXC_LEN_M);
1113 		if (len < VMXNET3_MIN_MTU) {
1114 			m_freem(m);
1115 			continue;
1116 		}
1117 		m->m_pkthdr.len = m->m_len = len;
1118 
1119 		vmxnet3_rx_csum(rxcd, m);
1120 		if (letoh32(rxcd->rxc_word2 & VMXNET3_RXC_VLAN)) {
1121 			m->m_flags |= M_VLANTAG;
1122 			m->m_pkthdr.ether_vtag = letoh32((rxcd->rxc_word2 >>
1123 			    VMXNET3_RXC_VLANTAG_S) & VMXNET3_RXC_VLANTAG_M);
1124 		}
1125 		if (((letoh32(rxcd->rxc_word0) >> VMXNET3_RXC_RSSTYPE_S) &
1126 		    VMXNET3_RXC_RSSTYPE_M) != VMXNET3_RXC_RSSTYPE_NONE) {
1127 			m->m_pkthdr.ph_flowid = letoh32(rxcd->rxc_word1);
1128 			SET(m->m_pkthdr.csum_flags, M_FLOWID);
1129 		}
1130 
1131 		ml_enqueue(&ml, m);
1132 	}
1133 
1134 	bus_dmamap_sync(sc->sc_dmat, VMX_DMA_MAP(&comp_ring->dmamem),
1135 	    0, VMX_DMA_LEN(&comp_ring->dmamem), BUS_DMASYNC_PREREAD);
1136 
1137 	comp_ring->next = next;
1138 	comp_ring->gen = rgen;
1139 
1140 	if (done == 0)
1141 		return;
1142 
1143 	ring = &rq->cmd_ring[0];
1144 
1145 	if (ifiq_input(rq->ifiq, &ml))
1146 		if_rxr_livelocked(&ring->rxr);
1147 
1148 	/* XXX Should we (try to) allocate buffers for ring 2 too? */
1149 	mtx_enter(&ring->mtx);
1150 	if_rxr_put(&ring->rxr, done);
1151 	vmxnet3_rxfill(ring);
1152 	mtx_leave(&ring->mtx);
1153 }
1154 
1155 void
1156 vmxnet3_iff(struct vmxnet3_softc *sc)
1157 {
1158 	struct ifnet *ifp = &sc->sc_arpcom.ac_if;
1159 	struct arpcom *ac = &sc->sc_arpcom;
1160 	struct vmxnet3_driver_shared *ds = sc->sc_ds;
1161 	struct ether_multi *enm;
1162 	struct ether_multistep step;
1163 	u_int mode;
1164 	u_int8_t *p;
1165 
1166 	ds->mcast_tablelen = 0;
1167 	CLR(ifp->if_flags, IFF_ALLMULTI);
1168 
1169 	/*
1170 	 * Always accept broadcast frames.
1171 	 * Always accept frames destined to our station address.
1172 	 */
1173 	mode = VMXNET3_RXMODE_BCAST | VMXNET3_RXMODE_UCAST;
1174 
1175 	if (ISSET(ifp->if_flags, IFF_PROMISC) || ac->ac_multirangecnt > 0 ||
1176 	    ac->ac_multicnt > 682) {
1177 		SET(ifp->if_flags, IFF_ALLMULTI);
1178 		SET(mode, (VMXNET3_RXMODE_ALLMULTI | VMXNET3_RXMODE_MCAST));
1179 		if (ifp->if_flags & IFF_PROMISC)
1180 			SET(mode, VMXNET3_RXMODE_PROMISC);
1181 	} else {
1182 		p = sc->sc_mcast;
1183 		ETHER_FIRST_MULTI(step, ac, enm);
1184 		while (enm != NULL) {
1185 			bcopy(enm->enm_addrlo, p, ETHER_ADDR_LEN);
1186 
1187 			p += ETHER_ADDR_LEN;
1188 
1189 			ETHER_NEXT_MULTI(step, enm);
1190 		}
1191 
1192 		if (ac->ac_multicnt > 0) {
1193 			SET(mode, VMXNET3_RXMODE_MCAST);
1194 			ds->mcast_tablelen = p - sc->sc_mcast;
1195 		}
1196 	}
1197 
1198 	WRITE_CMD(sc, VMXNET3_CMD_SET_FILTER);
1199 	ds->rxmode = mode;
1200 	WRITE_CMD(sc, VMXNET3_CMD_SET_RXMODE);
1201 }
1202 
1203 
1204 void
1205 vmxnet3_rx_csum(struct vmxnet3_rxcompdesc *rxcd, struct mbuf *m)
1206 {
1207 	if (letoh32(rxcd->rxc_word0 & VMXNET3_RXC_NOCSUM))
1208 		return;
1209 
1210 	if ((rxcd->rxc_word3 & (VMXNET3_RXC_IPV4 | VMXNET3_RXC_IPSUM_OK)) ==
1211 	    (VMXNET3_RXC_IPV4 | VMXNET3_RXC_IPSUM_OK))
1212 		m->m_pkthdr.csum_flags |= M_IPV4_CSUM_IN_OK;
1213 
1214 	if (rxcd->rxc_word3 & VMXNET3_RXC_FRAGMENT)
1215 		return;
1216 
1217 	if (rxcd->rxc_word3 & (VMXNET3_RXC_TCP | VMXNET3_RXC_UDP)) {
1218 		if (rxcd->rxc_word3 & VMXNET3_RXC_CSUM_OK)
1219 			m->m_pkthdr.csum_flags |=
1220 			    M_TCP_CSUM_IN_OK | M_UDP_CSUM_IN_OK;
1221 	}
1222 }
1223 
1224 void
1225 vmxnet3_stop(struct ifnet *ifp)
1226 {
1227 	struct vmxnet3_softc *sc = ifp->if_softc;
1228 	int queue;
1229 
1230 	ifp->if_flags &= ~IFF_RUNNING;
1231 	ifp->if_timer = 0;
1232 
1233 	vmxnet3_disable_all_intrs(sc);
1234 
1235 	WRITE_CMD(sc, VMXNET3_CMD_DISABLE);
1236 
1237 	if (sc->sc_intrmap != NULL) {
1238 		for (queue = 0; queue < sc->sc_nqueues; queue++)
1239 			intr_barrier(sc->sc_q[queue].ih);
1240 	} else
1241 		intr_barrier(sc->sc_ih);
1242 
1243 	for (queue = 0; queue < sc->sc_nqueues; queue++)
1244 		vmxnet3_txstop(sc, &sc->sc_q[queue].tx);
1245 	for (queue = 0; queue < sc->sc_nqueues; queue++)
1246 		vmxnet3_rxstop(sc, &sc->sc_q[queue].rx);
1247 }
1248 
1249 void
1250 vmxnet3_reset(struct vmxnet3_softc *sc)
1251 {
1252 	WRITE_CMD(sc, VMXNET3_CMD_RESET);
1253 }
1254 
1255 int
1256 vmxnet3_init(struct vmxnet3_softc *sc)
1257 {
1258 	struct ifnet *ifp = &sc->sc_arpcom.ac_if;
1259 	int queue;
1260 
1261 	/*
1262 	 * Cancel pending I/O and free all RX/TX buffers.
1263 	 */
1264 	vmxnet3_stop(ifp);
1265 
1266 #if 0
1267 	/* Put controller into known state. */
1268 	vmxnet3_reset(sc);
1269 #endif
1270 
1271 	for (queue = 0; queue < sc->sc_nqueues; queue++)
1272 		vmxnet3_txinit(sc, &sc->sc_q[queue].tx);
1273 	for (queue = 0; queue < sc->sc_nqueues; queue++)
1274 		vmxnet3_rxinit(sc, &sc->sc_q[queue].rx);
1275 
1276 	for (queue = 0; queue < sc->sc_nqueues; queue++) {
1277 		WRITE_BAR0(sc, VMXNET3_BAR0_RXH1(queue), 0);
1278 		WRITE_BAR0(sc, VMXNET3_BAR0_RXH2(queue), 0);
1279 	}
1280 
1281 	WRITE_CMD(sc, VMXNET3_CMD_ENABLE);
1282 	if (READ_BAR1(sc, VMXNET3_BAR1_CMD)) {
1283 		printf("%s: failed to initialize\n", ifp->if_xname);
1284 		vmxnet3_stop(ifp);
1285 		return EIO;
1286 	}
1287 
1288 	/* Program promiscuous mode and multicast filters. */
1289 	vmxnet3_iff(sc);
1290 
1291 	vmxnet3_enable_all_intrs(sc);
1292 
1293 	vmxnet3_link_state(sc);
1294 
1295 	ifp->if_flags |= IFF_RUNNING;
1296 
1297 	return 0;
1298 }
1299 
1300 static int
1301 vmx_rxr_info(struct vmxnet3_softc *sc, struct if_rxrinfo *ifri)
1302 {
1303 	struct if_rxring_info *ifrs, *ifr;
1304 	int error;
1305 	unsigned int i;
1306 
1307 	ifrs = mallocarray(sc->sc_nqueues, sizeof(*ifrs),
1308 	    M_TEMP, M_WAITOK|M_ZERO|M_CANFAIL);
1309 	if (ifrs == NULL)
1310 		return (ENOMEM);
1311 
1312 	for (i = 0; i < sc->sc_nqueues; i++) {
1313 		struct if_rxring *rxr = &sc->sc_q[i].rx.cmd_ring[0].rxr;
1314 		ifr = &ifrs[i];
1315 
1316 		ifr->ifr_size = JUMBO_LEN;
1317 		snprintf(ifr->ifr_name, sizeof(ifr->ifr_name), "%u", i);
1318 		ifr->ifr_info = *rxr;
1319 	}
1320 
1321 	error = if_rxr_info_ioctl(ifri, i, ifrs);
1322 
1323 	free(ifrs, M_TEMP, i * sizeof(*ifrs));
1324 
1325 	return (error);
1326 }
1327 
1328 int
1329 vmxnet3_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
1330 {
1331 	struct vmxnet3_softc *sc = ifp->if_softc;
1332 	struct ifreq *ifr = (struct ifreq *)data;
1333 	int error = 0, s;
1334 
1335 	s = splnet();
1336 
1337 	switch (cmd) {
1338 	case SIOCSIFADDR:
1339 		ifp->if_flags |= IFF_UP;
1340 		if ((ifp->if_flags & IFF_RUNNING) == 0)
1341 			error = vmxnet3_init(sc);
1342 		break;
1343 	case SIOCSIFFLAGS:
1344 		if (ifp->if_flags & IFF_UP) {
1345 			if (ifp->if_flags & IFF_RUNNING)
1346 				error = ENETRESET;
1347 			else
1348 				error = vmxnet3_init(sc);
1349 		} else {
1350 			if (ifp->if_flags & IFF_RUNNING)
1351 				vmxnet3_stop(ifp);
1352 		}
1353 		break;
1354 	case SIOCSIFMEDIA:
1355 	case SIOCGIFMEDIA:
1356 		error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd);
1357 		break;
1358 	case SIOCGIFRXR:
1359 		error = vmx_rxr_info(sc, (struct if_rxrinfo *)ifr->ifr_data);
1360 		break;
1361 	default:
1362 		error = ether_ioctl(ifp, &sc->sc_arpcom, cmd, data);
1363 	}
1364 
1365 	if (error == ENETRESET) {
1366 		if (ifp->if_flags & IFF_RUNNING)
1367 			vmxnet3_iff(sc);
1368 		error = 0;
1369 	}
1370 
1371 	splx(s);
1372 	return error;
1373 }
1374 
1375 static inline int
1376 vmx_load_mbuf(bus_dma_tag_t dmat, bus_dmamap_t map, struct mbuf *m)
1377 {
1378 	int error;
1379 
1380 	error = bus_dmamap_load_mbuf(dmat, map, m,
1381 	    BUS_DMA_STREAMING | BUS_DMA_NOWAIT);
1382 	if (error != EFBIG)
1383 		return (error);
1384 
1385 	error = m_defrag(m, M_DONTWAIT);
1386 	if (error != 0)
1387 		return (error);
1388 
1389 	return (bus_dmamap_load_mbuf(dmat, map, m,
1390 	    BUS_DMA_STREAMING | BUS_DMA_NOWAIT));
1391 }
1392 
1393 void
1394 vmxnet3_start(struct ifqueue *ifq)
1395 {
1396 	struct ifnet *ifp = ifq->ifq_if;
1397 	struct vmxnet3_softc *sc = ifp->if_softc;
1398 	struct vmxnet3_txqueue *tq = ifq->ifq_softc;
1399 	struct vmxnet3_txring *ring = &tq->cmd_ring;
1400 	struct vmxnet3_txdesc *txd, *sop;
1401 	bus_dmamap_t map;
1402 	unsigned int prod, free, i;
1403 	unsigned int post = 0;
1404 	uint32_t rgen, gen;
1405 
1406 	struct mbuf *m;
1407 
1408 	free = ring->cons;
1409 	prod = ring->prod;
1410 	if (free <= prod)
1411 		free += NTXDESC;
1412 	free -= prod;
1413 
1414 	bus_dmamap_sync(sc->sc_dmat, VMX_DMA_MAP(&ring->dmamem),
1415 	    0, VMX_DMA_LEN(&ring->dmamem), BUS_DMASYNC_POSTWRITE);
1416 
1417 	rgen = ring->gen;
1418 
1419 	for (;;) {
1420 		if (free <= NTXSEGS) {
1421 			ifq_set_oactive(ifq);
1422 			break;
1423 		}
1424 
1425 		m = ifq_dequeue(ifq);
1426 		if (m == NULL)
1427 			break;
1428 
1429 		map = ring->dmap[prod];
1430 
1431 		if (vmx_load_mbuf(sc->sc_dmat, map, m) != 0) {
1432 			ifq->ifq_errors++;
1433 			m_freem(m);
1434 			continue;
1435 		}
1436 
1437 #if NBPFILTER > 0
1438 		if (ifp->if_bpf)
1439 			bpf_mtap_ether(ifp->if_bpf, m, BPF_DIRECTION_OUT);
1440 #endif
1441 
1442 		ring->m[prod] = m;
1443 
1444 		bus_dmamap_sync(sc->sc_dmat, map, 0,
1445 		    map->dm_mapsize, BUS_DMASYNC_PREWRITE);
1446 
1447 		gen = rgen ^ VMX_TX_GEN;
1448 		sop = &ring->txd[prod];
1449 		for (i = 0; i < map->dm_nsegs; i++) {
1450 			txd = &ring->txd[prod];
1451 			txd->tx_addr = htole64(map->dm_segs[i].ds_addr);
1452 			txd->tx_word2 = htole32(map->dm_segs[i].ds_len <<
1453 			    VMXNET3_TX_LEN_S) | gen;
1454 			txd->tx_word3 = 0;
1455 
1456 			if (++prod == NTXDESC) {
1457 				prod = 0;
1458 				rgen ^= VMX_TX_GEN;
1459 			}
1460 
1461 			gen = rgen;
1462 		}
1463 		txd->tx_word3 = htole32(VMXNET3_TX_EOP | VMXNET3_TX_COMPREQ);
1464 
1465 		if (ISSET(m->m_flags, M_VLANTAG)) {
1466 			sop->tx_word3 |= htole32(VMXNET3_TX_VTAG_MODE);
1467 			sop->tx_word3 |= htole32((m->m_pkthdr.ether_vtag &
1468 			    VMXNET3_TX_VLANTAG_M) << VMXNET3_TX_VLANTAG_S);
1469 		}
1470 
1471 		ring->prod = prod;
1472 		/* Change the ownership by flipping the "generation" bit */
1473 		bus_dmamap_sync(sc->sc_dmat, VMX_DMA_MAP(&ring->dmamem),
1474 		    0, VMX_DMA_LEN(&ring->dmamem),
1475 		    BUS_DMASYNC_PREWRITE|BUS_DMASYNC_POSTWRITE);
1476 		sop->tx_word2 ^= VMX_TX_GEN;
1477 
1478 		free -= i;
1479 		post = 1;
1480 	}
1481 
1482 	bus_dmamap_sync(sc->sc_dmat, VMX_DMA_MAP(&ring->dmamem),
1483 	    0, VMX_DMA_LEN(&ring->dmamem), BUS_DMASYNC_PREWRITE);
1484 
1485 	if (!post)
1486 		return;
1487 
1488 	ring->gen = rgen;
1489 
1490 	WRITE_BAR0(sc, VMXNET3_BAR0_TXH(tq->queue), prod);
1491 }
1492 
1493 void
1494 vmxnet3_watchdog(struct ifnet *ifp)
1495 {
1496 	struct vmxnet3_softc *sc = ifp->if_softc;
1497 	int s;
1498 
1499 	printf("%s: device timeout\n", ifp->if_xname);
1500 	s = splnet();
1501 	vmxnet3_init(sc);
1502 	splx(s);
1503 }
1504 
1505 void
1506 vmxnet3_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1507 {
1508 	struct vmxnet3_softc *sc = ifp->if_softc;
1509 
1510 	vmxnet3_link_state(sc);
1511 
1512 	ifmr->ifm_status = IFM_AVALID;
1513 	ifmr->ifm_active = IFM_ETHER;
1514 
1515 	if (ifp->if_link_state != LINK_STATE_UP)
1516 		return;
1517 
1518 	ifmr->ifm_status |= IFM_ACTIVE;
1519 
1520 	if (ifp->if_baudrate >= IF_Gbps(10))
1521 		ifmr->ifm_active |= IFM_10G_T;
1522 }
1523 
1524 int
1525 vmxnet3_media_change(struct ifnet *ifp)
1526 {
1527 	return 0;
1528 }
1529 
1530 void *
1531 vmxnet3_dma_allocmem(struct vmxnet3_softc *sc, u_int size, u_int align, bus_addr_t *pa)
1532 {
1533 	bus_dma_tag_t t = sc->sc_dmat;
1534 	bus_dma_segment_t segs[1];
1535 	bus_dmamap_t map;
1536 	caddr_t va;
1537 	int n;
1538 
1539 	if (bus_dmamem_alloc(t, size, align, 0, segs, 1, &n, BUS_DMA_NOWAIT))
1540 		return NULL;
1541 	if (bus_dmamem_map(t, segs, 1, size, &va, BUS_DMA_NOWAIT))
1542 		return NULL;
1543 	if (bus_dmamap_create(t, size, 1, size, 0, BUS_DMA_NOWAIT, &map))
1544 		return NULL;
1545 	if (bus_dmamap_load(t, map, va, size, NULL, BUS_DMA_NOWAIT))
1546 		return NULL;
1547 	bzero(va, size);
1548 	*pa = DMAADDR(map);
1549 	bus_dmamap_unload(t, map);
1550 	bus_dmamap_destroy(t, map);
1551 	return va;
1552 }
1553 
1554 static int
1555 vmx_dmamem_alloc(struct vmxnet3_softc *sc, struct vmx_dmamem *vdm,
1556     bus_size_t size, u_int align)
1557 {
1558 	vdm->vdm_size = size;
1559 
1560 	if (bus_dmamap_create(sc->sc_dmat, vdm->vdm_size, 1,
1561 	    vdm->vdm_size, 0,
1562 	    BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW | BUS_DMA_64BIT,
1563 	    &vdm->vdm_map) != 0)
1564 		return (1);
1565 	if (bus_dmamem_alloc(sc->sc_dmat, vdm->vdm_size,
1566 	    align, 0, &vdm->vdm_seg, 1, &vdm->vdm_nsegs,
1567 	    BUS_DMA_WAITOK | BUS_DMA_ZERO) != 0)
1568 		goto destroy;
1569 	if (bus_dmamem_map(sc->sc_dmat, &vdm->vdm_seg, vdm->vdm_nsegs,
1570 	    vdm->vdm_size, &vdm->vdm_kva, BUS_DMA_WAITOK) != 0)
1571 		goto free;
1572 	if (bus_dmamap_load(sc->sc_dmat, vdm->vdm_map, vdm->vdm_kva,
1573 	    vdm->vdm_size, NULL, BUS_DMA_WAITOK) != 0)
1574 		goto unmap;
1575 
1576 	return (0);
1577 unmap:
1578 	bus_dmamem_unmap(sc->sc_dmat, vdm->vdm_kva, vdm->vdm_size);
1579 free:
1580 	bus_dmamem_free(sc->sc_dmat, &vdm->vdm_seg, 1);
1581 destroy:
1582 	bus_dmamap_destroy(sc->sc_dmat, vdm->vdm_map);
1583 	return (1);
1584 }
1585 
1586 #ifdef notyet
1587 static void
1588 vmx_dmamem_free(struct vmxnet3_softc *sc, struct vmx_dmamem *vdm)
1589 {
1590 	bus_dmamap_unload(sc->sc_dmat, vdm->vdm_map);
1591 	bus_dmamem_unmap(sc->sc_dmat, vdm->vdm_kva, vdm->vdm_size);
1592 	bus_dmamem_free(sc->sc_dmat, &vdm->vdm_seg, 1);
1593 	bus_dmamap_destroy(sc->sc_dmat, vdm->vdm_map);
1594 }
1595 #endif
1596 
1597 #if NKSTAT > 0
1598 /*
1599  * "hardware" counters are exported as separate kstats for each tx
1600  * and rx ring, but the request for the hypervisor to update the
1601  * stats is done once at the controller level. we limit the number
1602  * of updates at the controller level to a rate of one per second to
1603  * debounce this a bit.
1604  */
1605 static const struct timeval vmx_kstat_rate = { 1, 0 };
1606 
1607 /*
1608  * all the vmx stats are 64 bit counters, we just need their name and units.
1609  */
1610 struct vmx_kstat_tpl {
1611 	const char		*name;
1612 	enum kstat_kv_unit	 unit;
1613 };
1614 
1615 static const struct vmx_kstat_tpl vmx_rx_kstat_tpl[UPT1_RxStats_count] = {
1616 	{ "LRO packets",	KSTAT_KV_U_PACKETS },
1617 	{ "LRO bytes",		KSTAT_KV_U_BYTES },
1618 	{ "ucast packets",	KSTAT_KV_U_PACKETS },
1619 	{ "ucast bytes",	KSTAT_KV_U_BYTES },
1620 	{ "mcast packets",	KSTAT_KV_U_PACKETS },
1621 	{ "mcast bytes",	KSTAT_KV_U_BYTES },
1622 	{ "bcast packets",	KSTAT_KV_U_PACKETS },
1623 	{ "bcast bytes",	KSTAT_KV_U_BYTES },
1624 	{ "no buffers",		KSTAT_KV_U_PACKETS },
1625 	{ "errors",		KSTAT_KV_U_PACKETS },
1626 };
1627 
1628 static const struct vmx_kstat_tpl vmx_tx_kstat_tpl[UPT1_TxStats_count] = {
1629 	{ "TSO packets",	KSTAT_KV_U_PACKETS },
1630 	{ "TSO bytes",		KSTAT_KV_U_BYTES },
1631 	{ "ucast packets",	KSTAT_KV_U_PACKETS },
1632 	{ "ucast bytes",	KSTAT_KV_U_BYTES },
1633 	{ "mcast packets",	KSTAT_KV_U_PACKETS },
1634 	{ "mcast bytes",	KSTAT_KV_U_BYTES },
1635 	{ "bcast packets",	KSTAT_KV_U_PACKETS },
1636 	{ "bcast bytes",	KSTAT_KV_U_BYTES },
1637 	{ "errors",		KSTAT_KV_U_PACKETS },
1638 	{ "discards",		KSTAT_KV_U_PACKETS },
1639 };
1640 
1641 static void
1642 vmx_kstat_init(struct vmxnet3_softc *sc)
1643 {
1644 	rw_init(&sc->sc_kstat_lock, "vmxkstat");
1645 }
1646 
1647 static int
1648 vmx_kstat_read(struct kstat *ks)
1649 {
1650 	struct vmxnet3_softc *sc = ks->ks_softc;
1651 	struct kstat_kv *kvs = ks->ks_data;
1652 	uint64_t *vs = ks->ks_ptr;
1653 	unsigned int n, i;
1654 
1655 	if (ratecheck(&sc->sc_kstat_updated, &vmx_kstat_rate)) {
1656 		WRITE_CMD(sc, VMXNET3_CMD_GET_STATS);
1657 		/* barrier? */
1658 	}
1659 
1660 	n = ks->ks_datalen / sizeof(*kvs);
1661 	for (i = 0; i < n; i++)
1662 		kstat_kv_u64(&kvs[i]) = lemtoh64(&vs[i]);
1663 
1664 	TIMEVAL_TO_TIMESPEC(&sc->sc_kstat_updated, &ks->ks_updated);
1665 
1666 	return (0);
1667 }
1668 
1669 static struct kstat *
1670 vmx_kstat_create(struct vmxnet3_softc *sc, const char *name, unsigned int unit,
1671     const struct vmx_kstat_tpl *tpls, unsigned int n, uint64_t *vs)
1672 {
1673 	struct kstat *ks;
1674 	struct kstat_kv *kvs;
1675 	unsigned int i;
1676 
1677 	ks = kstat_create(sc->sc_dev.dv_xname, 0, name, unit,
1678 	    KSTAT_T_KV, 0);
1679 	if (ks == NULL)
1680 		return (NULL);
1681 
1682 	kvs = mallocarray(n, sizeof(*kvs), M_DEVBUF, M_WAITOK|M_ZERO);
1683 	for (i = 0; i < n; i++) {
1684 		const struct vmx_kstat_tpl *tpl = &tpls[i];
1685 
1686 		kstat_kv_unit_init(&kvs[i], tpl->name,
1687 		    KSTAT_KV_T_COUNTER64, tpl->unit);
1688 	}
1689 
1690 	ks->ks_softc = sc;
1691 	kstat_set_wlock(ks, &sc->sc_kstat_lock);
1692 	ks->ks_ptr = vs;
1693 	ks->ks_data = kvs;
1694 	ks->ks_datalen = n * sizeof(*kvs);
1695 	ks->ks_read = vmx_kstat_read;
1696 	TIMEVAL_TO_TIMESPEC(&vmx_kstat_rate, &ks->ks_interval);
1697 
1698 	kstat_install(ks);
1699 
1700 	return (ks);
1701 }
1702 
1703 static void
1704 vmx_kstat_txstats(struct vmxnet3_softc *sc, struct vmxnet3_txqueue *tq,
1705     int unit)
1706 {
1707 	tq->txkstat = vmx_kstat_create(sc, "vmx-txstats", unit,
1708 	    vmx_tx_kstat_tpl, nitems(vmx_tx_kstat_tpl), tq->ts->stats);
1709 }
1710 
1711 static void
1712 vmx_kstat_rxstats(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rq,
1713     int unit)
1714 {
1715 	rq->rxkstat = vmx_kstat_create(sc, "vmx-rxstats", unit,
1716 	    vmx_rx_kstat_tpl, nitems(vmx_rx_kstat_tpl), rq->rs->stats);
1717 }
1718 #endif /* NKSTAT > 0 */
1719