xref: /openbsd-src/sys/dev/pci/if_vmx.c (revision 3374c67d44f9b75b98444cbf63020f777792342e)
1 /*	$OpenBSD: if_vmx.c,v 1.70 2022/09/11 08:38:39 yasuoka Exp $	*/
2 
3 /*
4  * Copyright (c) 2013 Tsubai Masanari
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include "bpfilter.h"
20 #include "kstat.h"
21 
22 #include <sys/param.h>
23 #include <sys/device.h>
24 #include <sys/mbuf.h>
25 #include <sys/socket.h>
26 #include <sys/sockio.h>
27 #include <sys/systm.h>
28 #include <sys/atomic.h>
29 #include <sys/intrmap.h>
30 #include <sys/kstat.h>
31 
32 #include <net/bpf.h>
33 #include <net/if.h>
34 #include <net/toeplitz.h>
35 #include <net/if_media.h>
36 
37 #include <netinet/in.h>
38 #include <netinet/if_ether.h>
39 #include <netinet/ip.h>
40 #include <netinet/tcp.h>
41 #include <netinet/udp.h>
42 
43 #include <machine/bus.h>
44 
45 #include <dev/pci/if_vmxreg.h>
46 #include <dev/pci/pcivar.h>
47 #include <dev/pci/pcidevs.h>
48 
49 #define VMX_MAX_QUEUES	MIN(VMXNET3_MAX_TX_QUEUES, VMXNET3_MAX_RX_QUEUES)
50 
51 #define NTXDESC 512 /* tx ring size */
52 #define NTXSEGS 8 /* tx descriptors per packet */
53 #define NRXDESC 512
54 #define NTXCOMPDESC NTXDESC
55 #define NRXCOMPDESC (NRXDESC * 2)	/* ring1 + ring2 */
56 
57 #define VMXNET3_DRIVER_VERSION 0x00010000
58 
59 #define VMX_TX_GEN	htole32(VMXNET3_TX_GEN_M << VMXNET3_TX_GEN_S)
60 #define VMX_TXC_GEN	htole32(VMXNET3_TXC_GEN_M << VMXNET3_TXC_GEN_S)
61 #define VMX_RX_GEN	htole32(VMXNET3_RX_GEN_M << VMXNET3_RX_GEN_S)
62 #define VMX_RXC_GEN	htole32(VMXNET3_RXC_GEN_M << VMXNET3_RXC_GEN_S)
63 
64 struct vmxnet3_softc;
65 
66 struct vmxnet3_txring {
67 	struct mbuf *m[NTXDESC];
68 	bus_dmamap_t dmap[NTXDESC];
69 	struct vmxnet3_txdesc *txd;
70 	u_int32_t gen;
71 	u_int prod;
72 	u_int cons;
73 };
74 
75 struct vmxnet3_rxring {
76 	struct vmxnet3_softc *sc;
77 	struct mbuf *m[NRXDESC];
78 	bus_dmamap_t dmap[NRXDESC];
79 	struct mutex mtx;
80 	struct if_rxring rxr;
81 	struct timeout refill;
82 	struct vmxnet3_rxdesc *rxd;
83 	u_int fill;
84 	u_int32_t gen;
85 	u_int8_t rid;
86 };
87 
88 struct vmxnet3_comp_ring {
89 	union {
90 		struct vmxnet3_txcompdesc *txcd;
91 		struct vmxnet3_rxcompdesc *rxcd;
92 	};
93 	u_int next;
94 	u_int32_t gen;
95 };
96 
97 struct vmxnet3_txqueue {
98 	struct vmxnet3_softc *sc; /* sigh */
99 	struct vmxnet3_txring cmd_ring;
100 	struct vmxnet3_comp_ring comp_ring;
101 	struct vmxnet3_txq_shared *ts;
102 	struct ifqueue *ifq;
103 	struct kstat *txkstat;
104 } __aligned(64);
105 
106 struct vmxnet3_rxqueue {
107 	struct vmxnet3_softc *sc; /* sigh */
108 	struct vmxnet3_rxring cmd_ring[2];
109 	struct vmxnet3_comp_ring comp_ring;
110 	struct vmxnet3_rxq_shared *rs;
111 	struct ifiqueue *ifiq;
112 	struct kstat *rxkstat;
113 } __aligned(64);
114 
115 struct vmxnet3_queue {
116 	struct vmxnet3_txqueue tx;
117 	struct vmxnet3_rxqueue rx;
118 	struct vmxnet3_softc *sc;
119 	char intrname[16];
120 	void *ih;
121 	int intr;
122 };
123 
124 struct vmxnet3_softc {
125 	struct device sc_dev;
126 	struct arpcom sc_arpcom;
127 	struct ifmedia sc_media;
128 
129 	bus_space_tag_t	sc_iot0;
130 	bus_space_tag_t	sc_iot1;
131 	bus_space_handle_t sc_ioh0;
132 	bus_space_handle_t sc_ioh1;
133 	bus_dma_tag_t sc_dmat;
134 	void *sc_ih;
135 
136 	int sc_nqueues;
137 	struct vmxnet3_queue *sc_q;
138 	struct intrmap *sc_intrmap;
139 
140 	struct vmxnet3_driver_shared *sc_ds;
141 	u_int8_t *sc_mcast;
142 	struct vmxnet3_upt1_rss_conf *sc_rss;
143 
144 #if NKSTAT > 0
145 	struct rwlock		sc_kstat_lock;
146 	struct timeval		sc_kstat_updated;
147 #endif
148 };
149 
150 #define JUMBO_LEN (1024 * 9)
151 #define DMAADDR(map) ((map)->dm_segs[0].ds_addr)
152 
153 #define READ_BAR0(sc, reg) bus_space_read_4((sc)->sc_iot0, (sc)->sc_ioh0, reg)
154 #define READ_BAR1(sc, reg) bus_space_read_4((sc)->sc_iot1, (sc)->sc_ioh1, reg)
155 #define WRITE_BAR0(sc, reg, val) \
156 	bus_space_write_4((sc)->sc_iot0, (sc)->sc_ioh0, reg, val)
157 #define WRITE_BAR1(sc, reg, val) \
158 	bus_space_write_4((sc)->sc_iot1, (sc)->sc_ioh1, reg, val)
159 #define WRITE_CMD(sc, cmd) WRITE_BAR1(sc, VMXNET3_BAR1_CMD, cmd)
160 
161 int vmxnet3_match(struct device *, void *, void *);
162 void vmxnet3_attach(struct device *, struct device *, void *);
163 int vmxnet3_dma_init(struct vmxnet3_softc *);
164 int vmxnet3_alloc_txring(struct vmxnet3_softc *, int, int);
165 int vmxnet3_alloc_rxring(struct vmxnet3_softc *, int, int);
166 void vmxnet3_txinit(struct vmxnet3_softc *, struct vmxnet3_txqueue *);
167 void vmxnet3_rxinit(struct vmxnet3_softc *, struct vmxnet3_rxqueue *);
168 void vmxnet3_txstop(struct vmxnet3_softc *, struct vmxnet3_txqueue *);
169 void vmxnet3_rxstop(struct vmxnet3_softc *, struct vmxnet3_rxqueue *);
170 void vmxnet3_link_state(struct vmxnet3_softc *);
171 void vmxnet3_enable_all_intrs(struct vmxnet3_softc *);
172 void vmxnet3_disable_all_intrs(struct vmxnet3_softc *);
173 int vmxnet3_intr(void *);
174 int vmxnet3_intr_intx(void *);
175 int vmxnet3_intr_event(void *);
176 int vmxnet3_intr_queue(void *);
177 void vmxnet3_evintr(struct vmxnet3_softc *);
178 void vmxnet3_txintr(struct vmxnet3_softc *, struct vmxnet3_txqueue *);
179 void vmxnet3_rxintr(struct vmxnet3_softc *, struct vmxnet3_rxqueue *);
180 void vmxnet3_rxfill_tick(void *);
181 void vmxnet3_rxfill(struct vmxnet3_rxring *);
182 void vmxnet3_iff(struct vmxnet3_softc *);
183 void vmxnet3_rx_csum(struct vmxnet3_rxcompdesc *, struct mbuf *);
184 void vmxnet3_stop(struct ifnet *);
185 void vmxnet3_reset(struct vmxnet3_softc *);
186 int vmxnet3_init(struct vmxnet3_softc *);
187 int vmxnet3_ioctl(struct ifnet *, u_long, caddr_t);
188 void vmxnet3_start(struct ifqueue *);
189 int vmxnet3_load_mbuf(struct vmxnet3_softc *, struct vmxnet3_txring *,
190     struct mbuf **);
191 void vmxnet3_watchdog(struct ifnet *);
192 void vmxnet3_media_status(struct ifnet *, struct ifmediareq *);
193 int vmxnet3_media_change(struct ifnet *);
194 void *vmxnet3_dma_allocmem(struct vmxnet3_softc *, u_int, u_int, bus_addr_t *);
195 
196 #if NKSTAT > 0
197 static void	vmx_kstat_init(struct vmxnet3_softc *);
198 static void	vmx_kstat_txstats(struct vmxnet3_softc *,
199 		    struct vmxnet3_txqueue *, int);
200 static void	vmx_kstat_rxstats(struct vmxnet3_softc *,
201 		    struct vmxnet3_rxqueue *, int);
202 #endif /* NKSTAT > 0 */
203 
204 const struct pci_matchid vmx_devices[] = {
205 	{ PCI_VENDOR_VMWARE, PCI_PRODUCT_VMWARE_NET_3 }
206 };
207 
208 const struct cfattach vmx_ca = {
209 	sizeof(struct vmxnet3_softc), vmxnet3_match, vmxnet3_attach
210 };
211 
212 struct cfdriver vmx_cd = {
213 	NULL, "vmx", DV_IFNET
214 };
215 
216 int
217 vmxnet3_match(struct device *parent, void *match, void *aux)
218 {
219 	return (pci_matchbyid(aux, vmx_devices, nitems(vmx_devices)));
220 }
221 
222 void
223 vmxnet3_attach(struct device *parent, struct device *self, void *aux)
224 {
225 	struct vmxnet3_softc *sc = (void *)self;
226 	struct pci_attach_args *pa = aux;
227 	struct ifnet *ifp = &sc->sc_arpcom.ac_if;
228 	pci_intr_handle_t ih;
229 	const char *intrstr;
230 	u_int memtype, ver, macl, mach, intrcfg;
231 	u_char enaddr[ETHER_ADDR_LEN];
232 	int (*isr)(void *);
233 	int msix = 0;
234 	int i;
235 
236 	memtype = pci_mapreg_type(pa->pa_pc, pa->pa_tag, 0x10);
237 	if (pci_mapreg_map(pa, 0x10, memtype, 0, &sc->sc_iot0, &sc->sc_ioh0,
238 	    NULL, NULL, 0)) {
239 		printf(": failed to map BAR0\n");
240 		return;
241 	}
242 	memtype = pci_mapreg_type(pa->pa_pc, pa->pa_tag, 0x14);
243 	if (pci_mapreg_map(pa, 0x14, memtype, 0, &sc->sc_iot1, &sc->sc_ioh1,
244 	    NULL, NULL, 0)) {
245 		printf(": failed to map BAR1\n");
246 		return;
247 	}
248 
249 	ver = READ_BAR1(sc, VMXNET3_BAR1_VRRS);
250 	if ((ver & 0x1) == 0) {
251 		printf(": unsupported hardware version 0x%x\n", ver);
252 		return;
253 	}
254 	WRITE_BAR1(sc, VMXNET3_BAR1_VRRS, 1);
255 
256 	ver = READ_BAR1(sc, VMXNET3_BAR1_UVRS);
257 	if ((ver & 0x1) == 0) {
258 		printf(": incompatible UPT version 0x%x\n", ver);
259 		return;
260 	}
261 	WRITE_BAR1(sc, VMXNET3_BAR1_UVRS, 1);
262 
263 	sc->sc_dmat = pa->pa_dmat;
264 
265 	WRITE_CMD(sc, VMXNET3_CMD_GET_INTRCFG);
266 	intrcfg = READ_BAR1(sc, VMXNET3_BAR1_CMD);
267 	isr = vmxnet3_intr;
268 	sc->sc_nqueues = 1;
269 
270 	switch (intrcfg & VMXNET3_INTRCFG_TYPE_MASK) {
271 	case VMXNET3_INTRCFG_TYPE_AUTO:
272 	case VMXNET3_INTRCFG_TYPE_MSIX:
273 		msix = pci_intr_msix_count(pa);
274 		if (msix > 0) {
275 			if (pci_intr_map_msix(pa, 0, &ih) == 0) {
276 				msix--; /* are there spares for tx/rx qs? */
277 				if (msix == 0)
278 					break;
279 
280 				isr = vmxnet3_intr_event;
281 				sc->sc_intrmap = intrmap_create(&sc->sc_dev,
282 				    msix, VMX_MAX_QUEUES, INTRMAP_POWEROF2);
283 				sc->sc_nqueues = intrmap_count(sc->sc_intrmap);
284 			}
285 			break;
286 		}
287 
288 		/* FALLTHROUGH */
289 	case VMXNET3_INTRCFG_TYPE_MSI:
290 		if (pci_intr_map_msi(pa, &ih) == 0)
291 			break;
292 
293 		/* FALLTHROUGH */
294 	case VMXNET3_INTRCFG_TYPE_INTX:
295 		isr = vmxnet3_intr_intx;
296 		if (pci_intr_map(pa, &ih) == 0)
297 			break;
298 
299 		printf(": failed to map interrupt\n");
300 		return;
301 	}
302 	intrstr = pci_intr_string(pa->pa_pc, ih);
303 	sc->sc_ih = pci_intr_establish(pa->pa_pc, ih, IPL_NET | IPL_MPSAFE,
304 	    isr, sc, self->dv_xname);
305 	if (sc->sc_ih == NULL) {
306 		printf(": unable to establish interrupt handler");
307 		if (intrstr != NULL)
308 			printf(" at %s", intrstr);
309 		printf("\n");
310 		return;
311 	}
312 	if (intrstr)
313 		printf(": %s", intrstr);
314 
315 	sc->sc_q = mallocarray(sc->sc_nqueues, sizeof(*sc->sc_q),
316 	    M_DEVBUF, M_WAITOK|M_ZERO);
317 
318 	if (sc->sc_intrmap != NULL) {
319 		for (i = 0; i < sc->sc_nqueues; i++) {
320 			struct vmxnet3_queue *q;
321 			int vec;
322 
323 			q = &sc->sc_q[i];
324 			vec = i + 1;
325 			if (pci_intr_map_msix(pa, vec, &ih) != 0) {
326 				printf(", failed to map interrupt %d\n", vec);
327 				return;
328 			}
329 			snprintf(q->intrname, sizeof(q->intrname), "%s:%d",
330 			    self->dv_xname, i);
331 			q->ih = pci_intr_establish_cpu(pa->pa_pc, ih,
332 			    IPL_NET | IPL_MPSAFE,
333 			    intrmap_cpu(sc->sc_intrmap, i),
334 			    vmxnet3_intr_queue, q, q->intrname);
335 			if (q->ih == NULL) {
336 				printf(": unable to establish interrupt %d\n",
337 				    vec);
338 				return;
339 			}
340 
341 			q->intr = vec;
342 			q->sc = sc;
343 		}
344 	}
345 
346 	if (vmxnet3_dma_init(sc)) {
347 		printf(": failed to setup DMA\n");
348 		return;
349 	}
350 
351 	printf(", %d queue%s", sc->sc_nqueues, sc->sc_nqueues > 1 ? "s" : "");
352 
353 	WRITE_CMD(sc, VMXNET3_CMD_GET_MACL);
354 	macl = READ_BAR1(sc, VMXNET3_BAR1_CMD);
355 	enaddr[0] = macl;
356 	enaddr[1] = macl >> 8;
357 	enaddr[2] = macl >> 16;
358 	enaddr[3] = macl >> 24;
359 	WRITE_CMD(sc, VMXNET3_CMD_GET_MACH);
360 	mach = READ_BAR1(sc, VMXNET3_BAR1_CMD);
361 	enaddr[4] = mach;
362 	enaddr[5] = mach >> 8;
363 
364 	WRITE_BAR1(sc, VMXNET3_BAR1_MACL, macl);
365 	WRITE_BAR1(sc, VMXNET3_BAR1_MACH, mach);
366 	printf(", address %s\n", ether_sprintf(enaddr));
367 
368 	bcopy(enaddr, sc->sc_arpcom.ac_enaddr, 6);
369 	strlcpy(ifp->if_xname, self->dv_xname, IFNAMSIZ);
370 	ifp->if_softc = sc;
371 	ifp->if_flags = IFF_BROADCAST | IFF_MULTICAST | IFF_SIMPLEX;
372 	ifp->if_xflags = IFXF_MPSAFE;
373 	ifp->if_ioctl = vmxnet3_ioctl;
374 	ifp->if_qstart = vmxnet3_start;
375 	ifp->if_watchdog = vmxnet3_watchdog;
376 	ifp->if_hardmtu = VMXNET3_MAX_MTU;
377 	ifp->if_capabilities = IFCAP_VLAN_MTU;
378 #if 0
379 	if (sc->sc_ds->upt_features & UPT1_F_CSUM)
380 		ifp->if_capabilities |= IFCAP_CSUM_TCPv4 | IFCAP_CSUM_UDPv4;
381 #endif
382 	if (sc->sc_ds->upt_features & UPT1_F_VLAN)
383 		ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING;
384 
385 	ifq_set_maxlen(&ifp->if_snd, NTXDESC);
386 
387 	ifmedia_init(&sc->sc_media, IFM_IMASK, vmxnet3_media_change,
388 	    vmxnet3_media_status);
389 	ifmedia_add(&sc->sc_media, IFM_ETHER|IFM_AUTO, 0, NULL);
390 	ifmedia_add(&sc->sc_media, IFM_ETHER|IFM_10G_T|IFM_FDX, 0, NULL);
391 	ifmedia_add(&sc->sc_media, IFM_ETHER|IFM_10G_T, 0, NULL);
392 	ifmedia_add(&sc->sc_media, IFM_ETHER|IFM_1000_T|IFM_FDX, 0, NULL);
393 	ifmedia_add(&sc->sc_media, IFM_ETHER|IFM_1000_T, 0, NULL);
394 	ifmedia_set(&sc->sc_media, IFM_ETHER|IFM_AUTO);
395 
396 	if_attach(ifp);
397 	ether_ifattach(ifp);
398 	vmxnet3_link_state(sc);
399 
400 	if_attach_queues(ifp, sc->sc_nqueues);
401 	if_attach_iqueues(ifp, sc->sc_nqueues);
402 
403 #if NKSTAT > 0
404 	vmx_kstat_init(sc);
405 #endif
406 
407 	for (i = 0; i < sc->sc_nqueues; i++) {
408 		ifp->if_ifqs[i]->ifq_softc = &sc->sc_q[i].tx;
409 		sc->sc_q[i].tx.ifq = ifp->if_ifqs[i];
410 		sc->sc_q[i].rx.ifiq = ifp->if_iqs[i];
411 
412 #if NKSTAT > 0
413 		vmx_kstat_txstats(sc, &sc->sc_q[i].tx, i);
414 		vmx_kstat_rxstats(sc, &sc->sc_q[i].rx, i);
415 #endif
416 	}
417 }
418 
419 int
420 vmxnet3_dma_init(struct vmxnet3_softc *sc)
421 {
422 	struct vmxnet3_driver_shared *ds;
423 	struct vmxnet3_txq_shared *ts;
424 	struct vmxnet3_rxq_shared *rs;
425 	bus_addr_t ds_pa, qs_pa, mcast_pa;
426 	int i, queue, qs_len, intr;
427 	u_int major, minor, release_code, rev;
428 
429 	qs_len = sc->sc_nqueues * (sizeof *ts + sizeof *rs);
430 	ts = vmxnet3_dma_allocmem(sc, qs_len, VMXNET3_DMADESC_ALIGN, &qs_pa);
431 	if (ts == NULL)
432 		return -1;
433 	for (queue = 0; queue < sc->sc_nqueues; queue++)
434 		sc->sc_q[queue].tx.ts = ts++;
435 	rs = (void *)ts;
436 	for (queue = 0; queue < sc->sc_nqueues; queue++)
437 		sc->sc_q[queue].rx.rs = rs++;
438 
439 	for (queue = 0; queue < sc->sc_nqueues; queue++) {
440 		intr = sc->sc_q[queue].intr;
441 
442 		if (vmxnet3_alloc_txring(sc, queue, intr))
443 			return -1;
444 		if (vmxnet3_alloc_rxring(sc, queue, intr))
445 			return -1;
446 	}
447 
448 	sc->sc_mcast = vmxnet3_dma_allocmem(sc, 682 * ETHER_ADDR_LEN, 32, &mcast_pa);
449 	if (sc->sc_mcast == NULL)
450 		return -1;
451 
452 	ds = vmxnet3_dma_allocmem(sc, sizeof *sc->sc_ds, 8, &ds_pa);
453 	if (ds == NULL)
454 		return -1;
455 	sc->sc_ds = ds;
456 	ds->magic = VMXNET3_REV1_MAGIC;
457 	ds->version = VMXNET3_DRIVER_VERSION;
458 
459 	/*
460 	 * XXX FreeBSD version uses following values:
461 	 * (Does the device behavior depend on them?)
462 	 *
463 	 * major = __FreeBSD_version / 100000;
464 	 * minor = (__FreeBSD_version / 1000) % 100;
465 	 * release_code = (__FreeBSD_version / 100) % 10;
466 	 * rev = __FreeBSD_version % 100;
467 	 */
468 	major = 0;
469 	minor = 0;
470 	release_code = 0;
471 	rev = 0;
472 #ifdef __LP64__
473 	ds->guest = release_code << 30 | rev << 22 | major << 14 | minor << 6
474 	    | VMXNET3_GOS_FREEBSD | VMXNET3_GOS_64BIT;
475 #else
476 	ds->guest = release_code << 30 | rev << 22 | major << 14 | minor << 6
477 	    | VMXNET3_GOS_FREEBSD | VMXNET3_GOS_32BIT;
478 #endif
479 	ds->vmxnet3_revision = 1;
480 	ds->upt_version = 1;
481 	ds->upt_features = UPT1_F_CSUM | UPT1_F_VLAN;
482 	ds->driver_data = ~0ULL;
483 	ds->driver_data_len = 0;
484 	ds->queue_shared = qs_pa;
485 	ds->queue_shared_len = qs_len;
486 	ds->mtu = VMXNET3_MAX_MTU;
487 	ds->ntxqueue = sc->sc_nqueues;
488 	ds->nrxqueue = sc->sc_nqueues;
489 	ds->mcast_table = mcast_pa;
490 	ds->automask = 1;
491 	ds->nintr = 1 + (sc->sc_intrmap != NULL ? sc->sc_nqueues : 0);
492 	ds->evintr = 0;
493 	ds->ictrl = VMXNET3_ICTRL_DISABLE_ALL;
494 	for (i = 0; i < ds->nintr; i++)
495 		ds->modlevel[i] = UPT1_IMOD_ADAPTIVE;
496 
497 	if (sc->sc_nqueues > 1) {
498 		struct vmxnet3_upt1_rss_conf *rsscfg;
499 		bus_addr_t rss_pa;
500 
501 		rsscfg = vmxnet3_dma_allocmem(sc, sizeof(*rsscfg), 8, &rss_pa);
502 
503 		rsscfg->hash_type = UPT1_RSS_HASH_TYPE_TCP_IPV4 |
504 		    UPT1_RSS_HASH_TYPE_IPV4 |
505 		    UPT1_RSS_HASH_TYPE_TCP_IPV6 |
506 		    UPT1_RSS_HASH_TYPE_IPV6;
507 		rsscfg->hash_func = UPT1_RSS_HASH_FUNC_TOEPLITZ;
508 		rsscfg->hash_key_size = sizeof(rsscfg->hash_key);
509 		stoeplitz_to_key(rsscfg->hash_key, sizeof(rsscfg->hash_key));
510 
511 		rsscfg->ind_table_size = sizeof(rsscfg->ind_table);
512 		for (i = 0; i < sizeof(rsscfg->ind_table); i++)
513 			rsscfg->ind_table[i] = i % sc->sc_nqueues;
514 
515 		ds->upt_features |= UPT1_F_RSS;
516 		ds->rss.version = 1;
517 		ds->rss.len = sizeof(*rsscfg);
518 		ds->rss.paddr = rss_pa;
519 
520 		sc->sc_rss = rsscfg;
521 	}
522 
523 	WRITE_BAR1(sc, VMXNET3_BAR1_DSL, ds_pa);
524 	WRITE_BAR1(sc, VMXNET3_BAR1_DSH, (u_int64_t)ds_pa >> 32);
525 	return 0;
526 }
527 
528 int
529 vmxnet3_alloc_txring(struct vmxnet3_softc *sc, int queue, int intr)
530 {
531 	struct vmxnet3_txqueue *tq = &sc->sc_q[queue].tx;
532 	struct vmxnet3_txq_shared *ts;
533 	struct vmxnet3_txring *ring = &tq->cmd_ring;
534 	struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring;
535 	bus_addr_t pa, comp_pa;
536 	int idx;
537 
538 	ring->txd = vmxnet3_dma_allocmem(sc, NTXDESC * sizeof ring->txd[0], 512, &pa);
539 	if (ring->txd == NULL)
540 		return -1;
541 	comp_ring->txcd = vmxnet3_dma_allocmem(sc,
542 	    NTXCOMPDESC * sizeof comp_ring->txcd[0], 512, &comp_pa);
543 	if (comp_ring->txcd == NULL)
544 		return -1;
545 
546 	for (idx = 0; idx < NTXDESC; idx++) {
547 		if (bus_dmamap_create(sc->sc_dmat, JUMBO_LEN, NTXSEGS,
548 		    VMXNET3_TX_LEN_M + 1, 0, BUS_DMA_NOWAIT, &ring->dmap[idx]))
549 			return -1;
550 	}
551 
552 	ts = tq->ts;
553 	bzero(ts, sizeof *ts);
554 	ts->npending = 0;
555 	ts->intr_threshold = 1;
556 	ts->cmd_ring = pa;
557 	ts->cmd_ring_len = NTXDESC;
558 	ts->comp_ring = comp_pa;
559 	ts->comp_ring_len = NTXCOMPDESC;
560 	ts->driver_data = ~0ULL;
561 	ts->driver_data_len = 0;
562 	ts->intr_idx = intr;
563 	ts->stopped = 1;
564 	ts->error = 0;
565 	return 0;
566 }
567 
568 int
569 vmxnet3_alloc_rxring(struct vmxnet3_softc *sc, int queue, int intr)
570 {
571 	struct vmxnet3_rxqueue *rq = &sc->sc_q[queue].rx;
572 	struct vmxnet3_rxq_shared *rs;
573 	struct vmxnet3_rxring *ring;
574 	struct vmxnet3_comp_ring *comp_ring;
575 	bus_addr_t pa[2], comp_pa;
576 	int i, idx;
577 
578 	for (i = 0; i < 2; i++) {
579 		ring = &rq->cmd_ring[i];
580 		ring->rxd = vmxnet3_dma_allocmem(sc, NRXDESC * sizeof ring->rxd[0],
581 		    512, &pa[i]);
582 		if (ring->rxd == NULL)
583 			return -1;
584 	}
585 	comp_ring = &rq->comp_ring;
586 	comp_ring->rxcd = vmxnet3_dma_allocmem(sc,
587 	    NRXCOMPDESC * sizeof comp_ring->rxcd[0], 512, &comp_pa);
588 	if (comp_ring->rxcd == NULL)
589 		return -1;
590 
591 	for (i = 0; i < 2; i++) {
592 		ring = &rq->cmd_ring[i];
593 		ring->sc = sc;
594 		ring->rid = i;
595 		mtx_init(&ring->mtx, IPL_NET);
596 		timeout_set(&ring->refill, vmxnet3_rxfill_tick, ring);
597 		for (idx = 0; idx < NRXDESC; idx++) {
598 			if (bus_dmamap_create(sc->sc_dmat, JUMBO_LEN, 1,
599 			    JUMBO_LEN, 0, BUS_DMA_NOWAIT, &ring->dmap[idx]))
600 				return -1;
601 		}
602 	}
603 
604 	rs = rq->rs;
605 	bzero(rs, sizeof *rs);
606 	rs->cmd_ring[0] = pa[0];
607 	rs->cmd_ring[1] = pa[1];
608 	rs->cmd_ring_len[0] = NRXDESC;
609 	rs->cmd_ring_len[1] = NRXDESC;
610 	rs->comp_ring = comp_pa;
611 	rs->comp_ring_len = NRXCOMPDESC;
612 	rs->driver_data = ~0ULL;
613 	rs->driver_data_len = 0;
614 	rs->intr_idx = intr;
615 	rs->stopped = 1;
616 	rs->error = 0;
617 	return 0;
618 }
619 
620 void
621 vmxnet3_txinit(struct vmxnet3_softc *sc, struct vmxnet3_txqueue *tq)
622 {
623 	struct vmxnet3_txring *ring = &tq->cmd_ring;
624 	struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring;
625 
626 	ring->cons = ring->prod = 0;
627 	ring->gen = VMX_TX_GEN;
628 	comp_ring->next = 0;
629 	comp_ring->gen = VMX_TXC_GEN;
630 	bzero(ring->txd, NTXDESC * sizeof ring->txd[0]);
631 	bzero(comp_ring->txcd, NTXCOMPDESC * sizeof comp_ring->txcd[0]);
632 }
633 
634 void
635 vmxnet3_rxfill_tick(void *arg)
636 {
637 	struct vmxnet3_rxring *ring = arg;
638 
639 	if (!mtx_enter_try(&ring->mtx))
640 		return;
641 
642 	vmxnet3_rxfill(ring);
643 	mtx_leave(&ring->mtx);
644 }
645 
646 void
647 vmxnet3_rxfill(struct vmxnet3_rxring *ring)
648 {
649 	struct vmxnet3_softc *sc = ring->sc;
650 	struct vmxnet3_rxdesc *rxd;
651 	struct mbuf *m;
652 	bus_dmamap_t map;
653 	u_int slots;
654 	unsigned int prod;
655 	uint32_t rgen;
656 	uint32_t type = htole32(VMXNET3_BTYPE_HEAD << VMXNET3_RX_BTYPE_S);
657 
658 	MUTEX_ASSERT_LOCKED(&ring->mtx);
659 
660 	prod = ring->fill;
661 	rgen = ring->gen;
662 
663 	for (slots = if_rxr_get(&ring->rxr, NRXDESC); slots > 0; slots--) {
664 		KASSERT(ring->m[prod] == NULL);
665 
666 		m = MCLGETL(NULL, M_DONTWAIT, JUMBO_LEN);
667 		if (m == NULL)
668 			break;
669 
670 		m->m_pkthdr.len = m->m_len = JUMBO_LEN;
671 		m_adj(m, ETHER_ALIGN);
672 
673 		map = ring->dmap[prod];
674 		if (bus_dmamap_load_mbuf(sc->sc_dmat, map, m, BUS_DMA_NOWAIT))
675 			panic("load mbuf");
676 
677 		bus_dmamap_sync(sc->sc_dmat, map, 0, map->dm_mapsize,
678 		    BUS_DMASYNC_PREREAD);
679 
680 		ring->m[prod] = m;
681 
682 		rxd = &ring->rxd[prod];
683 		rxd->rx_addr = htole64(DMAADDR(map));
684 		membar_producer();
685 		rxd->rx_word2 = (htole32(m->m_pkthdr.len & VMXNET3_RX_LEN_M) <<
686 		    VMXNET3_RX_LEN_S) | type | rgen;
687 
688 		if (++prod == NRXDESC) {
689 			prod = 0;
690 			rgen ^= VMX_RX_GEN;
691 		}
692 	}
693 	if_rxr_put(&ring->rxr, slots);
694 
695 	ring->fill = prod;
696 	ring->gen = rgen;
697 
698 	if (if_rxr_inuse(&ring->rxr) == 0)
699 		timeout_add(&ring->refill, 1);
700 }
701 
702 void
703 vmxnet3_rxinit(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rq)
704 {
705 	struct vmxnet3_rxring *ring;
706 	struct vmxnet3_comp_ring *comp_ring;
707 	int i;
708 
709 	for (i = 0; i < 2; i++) {
710 		ring = &rq->cmd_ring[i];
711 		ring->fill = 0;
712 		ring->gen = VMX_RX_GEN;
713 		bzero(ring->rxd, NRXDESC * sizeof ring->rxd[0]);
714 		if_rxr_init(&ring->rxr, 2, NRXDESC - 1);
715 	}
716 
717 	/* XXX only fill ring 0 */
718 	ring = &rq->cmd_ring[0];
719 	mtx_enter(&ring->mtx);
720 	vmxnet3_rxfill(ring);
721 	mtx_leave(&ring->mtx);
722 
723 	comp_ring = &rq->comp_ring;
724 	comp_ring->next = 0;
725 	comp_ring->gen = VMX_RXC_GEN;
726 	bzero(comp_ring->rxcd, NRXCOMPDESC * sizeof comp_ring->rxcd[0]);
727 }
728 
729 void
730 vmxnet3_txstop(struct vmxnet3_softc *sc, struct vmxnet3_txqueue *tq)
731 {
732 	struct vmxnet3_txring *ring = &tq->cmd_ring;
733 	int idx;
734 
735 	for (idx = 0; idx < NTXDESC; idx++) {
736 		if (ring->m[idx]) {
737 			bus_dmamap_unload(sc->sc_dmat, ring->dmap[idx]);
738 			m_freem(ring->m[idx]);
739 			ring->m[idx] = NULL;
740 		}
741 	}
742 }
743 
744 void
745 vmxnet3_rxstop(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rq)
746 {
747 	struct vmxnet3_rxring *ring;
748 	int i, idx;
749 
750 	for (i = 0; i < 2; i++) {
751 		ring = &rq->cmd_ring[i];
752 		timeout_del(&ring->refill);
753 		for (idx = 0; idx < NRXDESC; idx++) {
754 			struct mbuf *m = ring->m[idx];
755 			if (m == NULL)
756 				continue;
757 
758 			ring->m[idx] = NULL;
759 			m_freem(m);
760 			bus_dmamap_unload(sc->sc_dmat, ring->dmap[idx]);
761 		}
762 	}
763 }
764 
765 void
766 vmxnet3_link_state(struct vmxnet3_softc *sc)
767 {
768 	struct ifnet *ifp = &sc->sc_arpcom.ac_if;
769 	u_int x, link, speed;
770 
771 	WRITE_CMD(sc, VMXNET3_CMD_GET_LINK);
772 	x = READ_BAR1(sc, VMXNET3_BAR1_CMD);
773 	speed = x >> 16;
774 	if (x & 1) {
775 		ifp->if_baudrate = IF_Mbps(speed);
776 		link = LINK_STATE_UP;
777 	} else
778 		link = LINK_STATE_DOWN;
779 
780 	if (ifp->if_link_state != link) {
781 		ifp->if_link_state = link;
782 		if_link_state_change(ifp);
783 	}
784 }
785 
786 static inline void
787 vmxnet3_enable_intr(struct vmxnet3_softc *sc, int irq)
788 {
789 	WRITE_BAR0(sc, VMXNET3_BAR0_IMASK(irq), 0);
790 }
791 
792 static inline void
793 vmxnet3_disable_intr(struct vmxnet3_softc *sc, int irq)
794 {
795 	WRITE_BAR0(sc, VMXNET3_BAR0_IMASK(irq), 1);
796 }
797 
798 void
799 vmxnet3_enable_all_intrs(struct vmxnet3_softc *sc)
800 {
801 	int i;
802 
803 	sc->sc_ds->ictrl &= ~VMXNET3_ICTRL_DISABLE_ALL;
804 	vmxnet3_enable_intr(sc, 0);
805 	if (sc->sc_intrmap) {
806 		for (i = 0; i < sc->sc_nqueues; i++)
807 			vmxnet3_enable_intr(sc, sc->sc_q[i].intr);
808 	}
809 }
810 
811 void
812 vmxnet3_disable_all_intrs(struct vmxnet3_softc *sc)
813 {
814 	int i;
815 
816 	sc->sc_ds->ictrl |= VMXNET3_ICTRL_DISABLE_ALL;
817 	vmxnet3_disable_intr(sc, 0);
818 	if (sc->sc_intrmap) {
819 		for (i = 0; i < sc->sc_nqueues; i++)
820 			vmxnet3_disable_intr(sc, sc->sc_q[i].intr);
821 	}
822 }
823 
824 int
825 vmxnet3_intr_intx(void *arg)
826 {
827 	struct vmxnet3_softc *sc = arg;
828 
829 	if (READ_BAR1(sc, VMXNET3_BAR1_INTR) == 0)
830 		return 0;
831 
832 	return (vmxnet3_intr(sc));
833 }
834 
835 int
836 vmxnet3_intr(void *arg)
837 {
838 	struct vmxnet3_softc *sc = arg;
839 	struct ifnet *ifp = &sc->sc_arpcom.ac_if;
840 
841 	if (sc->sc_ds->event) {
842 		KERNEL_LOCK();
843 		vmxnet3_evintr(sc);
844 		KERNEL_UNLOCK();
845 	}
846 
847 	if (ifp->if_flags & IFF_RUNNING) {
848 		vmxnet3_rxintr(sc, &sc->sc_q[0].rx);
849 		vmxnet3_txintr(sc, &sc->sc_q[0].tx);
850 		vmxnet3_enable_intr(sc, 0);
851 	}
852 
853 	return 1;
854 }
855 
856 int
857 vmxnet3_intr_event(void *arg)
858 {
859 	struct vmxnet3_softc *sc = arg;
860 
861 	if (sc->sc_ds->event) {
862 		KERNEL_LOCK();
863 		vmxnet3_evintr(sc);
864 		KERNEL_UNLOCK();
865 	}
866 
867 	vmxnet3_enable_intr(sc, 0);
868 	return 1;
869 }
870 
871 int
872 vmxnet3_intr_queue(void *arg)
873 {
874 	struct vmxnet3_queue *q = arg;
875 
876 	vmxnet3_rxintr(q->sc, &q->rx);
877 	vmxnet3_txintr(q->sc, &q->tx);
878 	vmxnet3_enable_intr(q->sc, q->intr);
879 
880 	return 1;
881 }
882 
883 void
884 vmxnet3_evintr(struct vmxnet3_softc *sc)
885 {
886 	struct ifnet *ifp = &sc->sc_arpcom.ac_if;
887 	u_int event = sc->sc_ds->event;
888 	struct vmxnet3_txq_shared *ts;
889 	struct vmxnet3_rxq_shared *rs;
890 
891 	/* Clear events. */
892 	WRITE_BAR1(sc, VMXNET3_BAR1_EVENT, event);
893 
894 	/* Link state change? */
895 	if (event & VMXNET3_EVENT_LINK)
896 		vmxnet3_link_state(sc);
897 
898 	/* Queue error? */
899 	if (event & (VMXNET3_EVENT_TQERROR | VMXNET3_EVENT_RQERROR)) {
900 		WRITE_CMD(sc, VMXNET3_CMD_GET_STATUS);
901 
902 		ts = sc->sc_q[0].tx.ts;
903 		if (ts->stopped)
904 			printf("%s: TX error 0x%x\n", ifp->if_xname, ts->error);
905 		rs = sc->sc_q[0].rx.rs;
906 		if (rs->stopped)
907 			printf("%s: RX error 0x%x\n", ifp->if_xname, rs->error);
908 		vmxnet3_init(sc);
909 	}
910 
911 	if (event & VMXNET3_EVENT_DIC)
912 		printf("%s: device implementation change event\n",
913 		    ifp->if_xname);
914 	if (event & VMXNET3_EVENT_DEBUG)
915 		printf("%s: debug event\n", ifp->if_xname);
916 }
917 
918 void
919 vmxnet3_txintr(struct vmxnet3_softc *sc, struct vmxnet3_txqueue *tq)
920 {
921 	struct ifqueue *ifq = tq->ifq;
922 	struct vmxnet3_txring *ring = &tq->cmd_ring;
923 	struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring;
924 	struct vmxnet3_txcompdesc *txcd;
925 	bus_dmamap_t map;
926 	struct mbuf *m;
927 	u_int cons, next;
928 	uint32_t rgen;
929 
930 	cons = ring->cons;
931 	if (cons == ring->prod)
932 		return;
933 
934 	next = comp_ring->next;
935 	rgen = comp_ring->gen;
936 
937 	/* postread */
938 	for (;;) {
939 		txcd = &comp_ring->txcd[next];
940 		if ((txcd->txc_word3 & VMX_TXC_GEN) != rgen)
941 			break;
942 
943 		if (++next == NTXCOMPDESC) {
944 			next = 0;
945 			rgen ^= VMX_TXC_GEN;
946 		}
947 
948 		m = ring->m[cons];
949 		ring->m[cons] = NULL;
950 
951 		KASSERT(m != NULL);
952 
953 		map = ring->dmap[cons];
954 		bus_dmamap_unload(sc->sc_dmat, map);
955 		m_freem(m);
956 
957 		cons = (letoh32(txcd->txc_word0) >> VMXNET3_TXC_EOPIDX_S) &
958 		    VMXNET3_TXC_EOPIDX_M;
959 		cons++;
960 		cons %= NTXDESC;
961 	}
962 	/* preread */
963 
964 	comp_ring->next = next;
965 	comp_ring->gen = rgen;
966 	ring->cons = cons;
967 
968 	if (ifq_is_oactive(ifq))
969 		ifq_restart(ifq);
970 }
971 
972 void
973 vmxnet3_rxintr(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rq)
974 {
975 	struct ifnet *ifp = &sc->sc_arpcom.ac_if;
976 	struct vmxnet3_comp_ring *comp_ring = &rq->comp_ring;
977 	struct vmxnet3_rxring *ring;
978 	struct vmxnet3_rxcompdesc *rxcd;
979 	struct mbuf_list ml = MBUF_LIST_INITIALIZER();
980 	struct mbuf *m;
981 	bus_dmamap_t map;
982 	unsigned int idx, len;
983 	unsigned int next, rgen;
984 	unsigned int done = 0;
985 
986 	next = comp_ring->next;
987 	rgen = comp_ring->gen;
988 
989 	for (;;) {
990 		rxcd = &comp_ring->rxcd[next];
991 		if ((rxcd->rxc_word3 & VMX_RXC_GEN) != rgen)
992 			break;
993 
994 		if (++next == NRXCOMPDESC) {
995 			next = 0;
996 			rgen ^= VMX_RXC_GEN;
997 		}
998 
999 		idx = letoh32((rxcd->rxc_word0 >> VMXNET3_RXC_IDX_S) &
1000 		    VMXNET3_RXC_IDX_M);
1001 		if (letoh32((rxcd->rxc_word0 >> VMXNET3_RXC_QID_S) &
1002 		    VMXNET3_RXC_QID_M) < sc->sc_nqueues)
1003 			ring = &rq->cmd_ring[0];
1004 		else
1005 			ring = &rq->cmd_ring[1];
1006 
1007 		m = ring->m[idx];
1008 		KASSERT(m != NULL);
1009 		ring->m[idx] = NULL;
1010 
1011 		map = ring->dmap[idx];
1012 		bus_dmamap_sync(sc->sc_dmat, map, 0, map->dm_mapsize,
1013 		    BUS_DMASYNC_POSTREAD);
1014 		bus_dmamap_unload(sc->sc_dmat, map);
1015 
1016 		done++;
1017 
1018 		if (letoh32(rxcd->rxc_word2 & VMXNET3_RXC_ERROR)) {
1019 			ifp->if_ierrors++;
1020 			m_freem(m);
1021 			goto skip_buffer;
1022 		}
1023 
1024 		len = letoh32((rxcd->rxc_word2 >> VMXNET3_RXC_LEN_S) &
1025 		    VMXNET3_RXC_LEN_M);
1026 		if (len < VMXNET3_MIN_MTU) {
1027 			m_freem(m);
1028 			goto skip_buffer;
1029 		}
1030 		m->m_pkthdr.len = m->m_len = len;
1031 
1032 		vmxnet3_rx_csum(rxcd, m);
1033 		if (letoh32(rxcd->rxc_word2 & VMXNET3_RXC_VLAN)) {
1034 			m->m_flags |= M_VLANTAG;
1035 			m->m_pkthdr.ether_vtag = letoh32((rxcd->rxc_word2 >>
1036 			    VMXNET3_RXC_VLANTAG_S) & VMXNET3_RXC_VLANTAG_M);
1037 		}
1038 		if (((letoh32(rxcd->rxc_word0) >> VMXNET3_RXC_RSSTYPE_S) &
1039 		    VMXNET3_RXC_RSSTYPE_M) != VMXNET3_RXC_RSSTYPE_NONE) {
1040 			m->m_pkthdr.ph_flowid = letoh32(rxcd->rxc_word1);
1041 			SET(m->m_pkthdr.csum_flags, M_FLOWID);
1042 		}
1043 
1044 		ml_enqueue(&ml, m);
1045 
1046 skip_buffer:
1047 		if (rq->rs->update_rxhead) {
1048 			u_int qid = letoh32((rxcd->rxc_word0 >>
1049 			    VMXNET3_RXC_QID_S) & VMXNET3_RXC_QID_M);
1050 
1051 			idx = (idx + 1) % NRXDESC;
1052 			if (qid < sc->sc_nqueues) {
1053 				WRITE_BAR0(sc, VMXNET3_BAR0_RXH1(qid), idx);
1054 			} else {
1055 				qid -= sc->sc_nqueues;
1056 				WRITE_BAR0(sc, VMXNET3_BAR0_RXH2(qid), idx);
1057 			}
1058 		}
1059 	}
1060 
1061 	comp_ring->next = next;
1062 	comp_ring->gen = rgen;
1063 
1064 	if (done == 0)
1065 		return;
1066 
1067 	ring = &rq->cmd_ring[0];
1068 
1069 	if (ifiq_input(rq->ifiq, &ml))
1070 		if_rxr_livelocked(&ring->rxr);
1071 
1072 	/* XXX Should we (try to) allocate buffers for ring 2 too? */
1073 	mtx_enter(&ring->mtx);
1074 	if_rxr_put(&ring->rxr, done);
1075 	vmxnet3_rxfill(ring);
1076 	mtx_leave(&ring->mtx);
1077 }
1078 
1079 void
1080 vmxnet3_iff(struct vmxnet3_softc *sc)
1081 {
1082 	struct ifnet *ifp = &sc->sc_arpcom.ac_if;
1083 	struct arpcom *ac = &sc->sc_arpcom;
1084 	struct vmxnet3_driver_shared *ds = sc->sc_ds;
1085 	struct ether_multi *enm;
1086 	struct ether_multistep step;
1087 	u_int mode;
1088 	u_int8_t *p;
1089 
1090 	ds->mcast_tablelen = 0;
1091 	CLR(ifp->if_flags, IFF_ALLMULTI);
1092 
1093 	/*
1094 	 * Always accept broadcast frames.
1095 	 * Always accept frames destined to our station address.
1096 	 */
1097 	mode = VMXNET3_RXMODE_BCAST | VMXNET3_RXMODE_UCAST;
1098 
1099 	if (ISSET(ifp->if_flags, IFF_PROMISC) || ac->ac_multirangecnt > 0 ||
1100 	    ac->ac_multicnt > 682) {
1101 		SET(ifp->if_flags, IFF_ALLMULTI);
1102 		SET(mode, (VMXNET3_RXMODE_ALLMULTI | VMXNET3_RXMODE_MCAST));
1103 		if (ifp->if_flags & IFF_PROMISC)
1104 			SET(mode, VMXNET3_RXMODE_PROMISC);
1105 	} else {
1106 		p = sc->sc_mcast;
1107 		ETHER_FIRST_MULTI(step, ac, enm);
1108 		while (enm != NULL) {
1109 			bcopy(enm->enm_addrlo, p, ETHER_ADDR_LEN);
1110 
1111 			p += ETHER_ADDR_LEN;
1112 
1113 			ETHER_NEXT_MULTI(step, enm);
1114 		}
1115 
1116 		if (ac->ac_multicnt > 0) {
1117 			SET(mode, VMXNET3_RXMODE_MCAST);
1118 			ds->mcast_tablelen = p - sc->sc_mcast;
1119 		}
1120 	}
1121 
1122 	WRITE_CMD(sc, VMXNET3_CMD_SET_FILTER);
1123 	ds->rxmode = mode;
1124 	WRITE_CMD(sc, VMXNET3_CMD_SET_RXMODE);
1125 }
1126 
1127 
1128 void
1129 vmxnet3_rx_csum(struct vmxnet3_rxcompdesc *rxcd, struct mbuf *m)
1130 {
1131 	if (letoh32(rxcd->rxc_word0 & VMXNET3_RXC_NOCSUM))
1132 		return;
1133 
1134 	if ((rxcd->rxc_word3 & (VMXNET3_RXC_IPV4 | VMXNET3_RXC_IPSUM_OK)) ==
1135 	    (VMXNET3_RXC_IPV4 | VMXNET3_RXC_IPSUM_OK))
1136 		m->m_pkthdr.csum_flags |= M_IPV4_CSUM_IN_OK;
1137 
1138 	if (rxcd->rxc_word3 & VMXNET3_RXC_FRAGMENT)
1139 		return;
1140 
1141 	if (rxcd->rxc_word3 & (VMXNET3_RXC_TCP | VMXNET3_RXC_UDP)) {
1142 		if (rxcd->rxc_word3 & VMXNET3_RXC_CSUM_OK)
1143 			m->m_pkthdr.csum_flags |=
1144 			    M_TCP_CSUM_IN_OK | M_UDP_CSUM_IN_OK;
1145 	}
1146 }
1147 
1148 void
1149 vmxnet3_stop(struct ifnet *ifp)
1150 {
1151 	struct vmxnet3_softc *sc = ifp->if_softc;
1152 	int queue;
1153 
1154 	ifp->if_flags &= ~IFF_RUNNING;
1155 	ifq_clr_oactive(&ifp->if_snd);
1156 	ifp->if_timer = 0;
1157 
1158 	vmxnet3_disable_all_intrs(sc);
1159 
1160 	WRITE_CMD(sc, VMXNET3_CMD_DISABLE);
1161 
1162 	if (sc->sc_intrmap != NULL) {
1163 		for (queue = 0; queue < sc->sc_nqueues; queue++)
1164 			intr_barrier(sc->sc_q[queue].ih);
1165 	} else
1166 		intr_barrier(sc->sc_ih);
1167 
1168 	for (queue = 0; queue < sc->sc_nqueues; queue++)
1169 		vmxnet3_txstop(sc, &sc->sc_q[queue].tx);
1170 	for (queue = 0; queue < sc->sc_nqueues; queue++)
1171 		vmxnet3_rxstop(sc, &sc->sc_q[queue].rx);
1172 }
1173 
1174 void
1175 vmxnet3_reset(struct vmxnet3_softc *sc)
1176 {
1177 	WRITE_CMD(sc, VMXNET3_CMD_RESET);
1178 }
1179 
1180 int
1181 vmxnet3_init(struct vmxnet3_softc *sc)
1182 {
1183 	struct ifnet *ifp = &sc->sc_arpcom.ac_if;
1184 	int queue;
1185 
1186 	/*
1187 	 * Cancel pending I/O and free all RX/TX buffers.
1188 	 */
1189 	vmxnet3_stop(ifp);
1190 
1191 #if 0
1192 	/* Put controller into known state. */
1193 	vmxnet3_reset(sc);
1194 #endif
1195 
1196 	for (queue = 0; queue < sc->sc_nqueues; queue++)
1197 		vmxnet3_txinit(sc, &sc->sc_q[queue].tx);
1198 	for (queue = 0; queue < sc->sc_nqueues; queue++)
1199 		vmxnet3_rxinit(sc, &sc->sc_q[queue].rx);
1200 
1201 	for (queue = 0; queue < sc->sc_nqueues; queue++) {
1202 		WRITE_BAR0(sc, VMXNET3_BAR0_RXH1(queue), 0);
1203 		WRITE_BAR0(sc, VMXNET3_BAR0_RXH2(queue), 0);
1204 	}
1205 
1206 	WRITE_CMD(sc, VMXNET3_CMD_ENABLE);
1207 	if (READ_BAR1(sc, VMXNET3_BAR1_CMD)) {
1208 		printf("%s: failed to initialize\n", ifp->if_xname);
1209 		vmxnet3_stop(ifp);
1210 		return EIO;
1211 	}
1212 
1213 	/* Program promiscuous mode and multicast filters. */
1214 	vmxnet3_iff(sc);
1215 
1216 	vmxnet3_enable_all_intrs(sc);
1217 
1218 	vmxnet3_link_state(sc);
1219 
1220 	ifp->if_flags |= IFF_RUNNING;
1221 	ifq_clr_oactive(&ifp->if_snd);
1222 
1223 	return 0;
1224 }
1225 
1226 static int
1227 vmx_rxr_info(struct vmxnet3_softc *sc, struct if_rxrinfo *ifri)
1228 {
1229 	struct if_rxring_info *ifrs, *ifr;
1230 	int error;
1231 	unsigned int i;
1232 
1233 	ifrs = mallocarray(sc->sc_nqueues, sizeof(*ifrs),
1234 	    M_TEMP, M_WAITOK|M_ZERO|M_CANFAIL);
1235 	if (ifrs == NULL)
1236 		return (ENOMEM);
1237 
1238 	for (i = 0; i < sc->sc_nqueues; i++) {
1239 		struct if_rxring *rxr = &sc->sc_q[i].rx.cmd_ring[0].rxr;
1240 		ifr = &ifrs[i];
1241 
1242 		ifr->ifr_size = JUMBO_LEN;
1243 		snprintf(ifr->ifr_name, sizeof(ifr->ifr_name), "%u", i);
1244 		ifr->ifr_info = *rxr;
1245 	}
1246 
1247 	error = if_rxr_info_ioctl(ifri, i, ifrs);
1248 
1249 	free(ifrs, M_TEMP, i * sizeof(*ifrs));
1250 
1251 	return (error);
1252 }
1253 
1254 int
1255 vmxnet3_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
1256 {
1257 	struct vmxnet3_softc *sc = ifp->if_softc;
1258 	struct ifreq *ifr = (struct ifreq *)data;
1259 	int error = 0, s;
1260 
1261 	s = splnet();
1262 
1263 	switch (cmd) {
1264 	case SIOCSIFADDR:
1265 		ifp->if_flags |= IFF_UP;
1266 		if ((ifp->if_flags & IFF_RUNNING) == 0)
1267 			error = vmxnet3_init(sc);
1268 		break;
1269 	case SIOCSIFFLAGS:
1270 		if (ifp->if_flags & IFF_UP) {
1271 			if (ifp->if_flags & IFF_RUNNING)
1272 				error = ENETRESET;
1273 			else
1274 				error = vmxnet3_init(sc);
1275 		} else {
1276 			if (ifp->if_flags & IFF_RUNNING)
1277 				vmxnet3_stop(ifp);
1278 		}
1279 		break;
1280 	case SIOCSIFMEDIA:
1281 	case SIOCGIFMEDIA:
1282 		error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd);
1283 		break;
1284 	case SIOCGIFRXR:
1285 		error = vmx_rxr_info(sc, (struct if_rxrinfo *)ifr->ifr_data);
1286 		break;
1287 	default:
1288 		error = ether_ioctl(ifp, &sc->sc_arpcom, cmd, data);
1289 	}
1290 
1291 	if (error == ENETRESET) {
1292 		if (ifp->if_flags & IFF_RUNNING)
1293 			vmxnet3_iff(sc);
1294 		error = 0;
1295 	}
1296 
1297 	splx(s);
1298 	return error;
1299 }
1300 
1301 static inline int
1302 vmx_load_mbuf(bus_dma_tag_t dmat, bus_dmamap_t map, struct mbuf *m)
1303 {
1304 	int error;
1305 
1306 	error = bus_dmamap_load_mbuf(dmat, map, m,
1307 	    BUS_DMA_STREAMING | BUS_DMA_NOWAIT);
1308 	if (error != EFBIG)
1309 		return (error);
1310 
1311 	error = m_defrag(m, M_DONTWAIT);
1312 	if (error != 0)
1313 		return (error);
1314 
1315 	return (bus_dmamap_load_mbuf(dmat, map, m,
1316 	    BUS_DMA_STREAMING | BUS_DMA_NOWAIT));
1317 }
1318 
1319 void
1320 vmxnet3_start(struct ifqueue *ifq)
1321 {
1322 	struct ifnet *ifp = ifq->ifq_if;
1323 	struct vmxnet3_softc *sc = ifp->if_softc;
1324 	struct vmxnet3_txqueue *tq = ifq->ifq_softc;
1325 	struct vmxnet3_txring *ring = &tq->cmd_ring;
1326 	struct vmxnet3_txdesc *txd, *sop;
1327 	bus_dmamap_t map;
1328         unsigned int prod, free, i;
1329 	unsigned int post = 0;
1330 	uint32_t rgen, gen;
1331 
1332 	struct mbuf *m;
1333 
1334 	free = ring->cons;
1335 	prod = ring->prod;
1336 	if (free <= prod)
1337 		free += NTXDESC;
1338 	free -= prod;
1339 
1340 	rgen = ring->gen;
1341 
1342 	for (;;) {
1343 		if (free <= NTXSEGS) {
1344 			ifq_set_oactive(ifq);
1345 			break;
1346 		}
1347 
1348 		m = ifq_dequeue(ifq);
1349 		if (m == NULL)
1350 			break;
1351 
1352 		map = ring->dmap[prod];
1353 
1354 		if (vmx_load_mbuf(sc->sc_dmat, map, m) != 0) {
1355 			ifq->ifq_errors++;
1356 			m_freem(m);
1357 			continue;
1358 		}
1359 
1360 #if NBPFILTER > 0
1361 		if (ifp->if_bpf)
1362 			bpf_mtap_ether(ifp->if_bpf, m, BPF_DIRECTION_OUT);
1363 #endif
1364 
1365 		ring->m[prod] = m;
1366 
1367 		bus_dmamap_sync(sc->sc_dmat, map, 0,
1368 		    map->dm_mapsize, BUS_DMASYNC_PREWRITE);
1369 
1370 		gen = rgen ^ VMX_TX_GEN;
1371 		sop = &ring->txd[prod];
1372 		for (i = 0; i < map->dm_nsegs; i++) {
1373 			txd = &ring->txd[prod];
1374 			txd->tx_addr = htole64(map->dm_segs[i].ds_addr);
1375 			txd->tx_word2 = htole32(map->dm_segs[i].ds_len <<
1376 			    VMXNET3_TX_LEN_S) | gen;
1377 			txd->tx_word3 = 0;
1378 
1379 			if (++prod == NTXDESC) {
1380 				prod = 0;
1381 				rgen ^= VMX_TX_GEN;
1382 			}
1383 
1384 			gen = rgen;
1385 		}
1386 		txd->tx_word3 = htole32(VMXNET3_TX_EOP | VMXNET3_TX_COMPREQ);
1387 
1388 		if (ISSET(m->m_flags, M_VLANTAG)) {
1389 			sop->tx_word3 |= htole32(VMXNET3_TX_VTAG_MODE);
1390 			sop->tx_word3 |= htole32((m->m_pkthdr.ether_vtag &
1391 			    VMXNET3_TX_VLANTAG_M) << VMXNET3_TX_VLANTAG_S);
1392 		}
1393 
1394 		/* Change the ownership by flipping the "generation" bit */
1395 		membar_producer();
1396 		sop->tx_word2 ^= VMX_TX_GEN;
1397 
1398 		free -= i;
1399 		post = 1;
1400 	}
1401 
1402 	if (!post)
1403 		return;
1404 
1405 	ring->prod = prod;
1406 	ring->gen = rgen;
1407 
1408 	WRITE_BAR0(sc, VMXNET3_BAR0_TXH(0), prod);
1409 }
1410 
1411 void
1412 vmxnet3_watchdog(struct ifnet *ifp)
1413 {
1414 	struct vmxnet3_softc *sc = ifp->if_softc;
1415 	int s;
1416 
1417 	printf("%s: device timeout\n", ifp->if_xname);
1418 	s = splnet();
1419 	vmxnet3_init(sc);
1420 	splx(s);
1421 }
1422 
1423 void
1424 vmxnet3_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1425 {
1426 	struct vmxnet3_softc *sc = ifp->if_softc;
1427 
1428 	vmxnet3_link_state(sc);
1429 
1430 	ifmr->ifm_status = IFM_AVALID;
1431 	ifmr->ifm_active = IFM_ETHER;
1432 
1433 	if (ifp->if_link_state != LINK_STATE_UP)
1434 		return;
1435 
1436 	ifmr->ifm_status |= IFM_ACTIVE;
1437 
1438 	if (ifp->if_baudrate >= IF_Gbps(10))
1439 		ifmr->ifm_active |= IFM_10G_T;
1440 }
1441 
1442 int
1443 vmxnet3_media_change(struct ifnet *ifp)
1444 {
1445 	return 0;
1446 }
1447 
1448 void *
1449 vmxnet3_dma_allocmem(struct vmxnet3_softc *sc, u_int size, u_int align, bus_addr_t *pa)
1450 {
1451 	bus_dma_tag_t t = sc->sc_dmat;
1452 	bus_dma_segment_t segs[1];
1453 	bus_dmamap_t map;
1454 	caddr_t va;
1455 	int n;
1456 
1457 	if (bus_dmamem_alloc(t, size, align, 0, segs, 1, &n, BUS_DMA_NOWAIT))
1458 		return NULL;
1459 	if (bus_dmamem_map(t, segs, 1, size, &va, BUS_DMA_NOWAIT))
1460 		return NULL;
1461 	if (bus_dmamap_create(t, size, 1, size, 0, BUS_DMA_NOWAIT, &map))
1462 		return NULL;
1463 	if (bus_dmamap_load(t, map, va, size, NULL, BUS_DMA_NOWAIT))
1464 		return NULL;
1465 	bzero(va, size);
1466 	*pa = DMAADDR(map);
1467 	bus_dmamap_unload(t, map);
1468 	bus_dmamap_destroy(t, map);
1469 	return va;
1470 }
1471 
1472 #if NKSTAT > 0
1473 /*
1474  * "hardware" counters are exported as separate kstats for each tx
1475  * and rx ring, but the request for the hypervisor to update the
1476  * stats is done once at the controller level. we limit the number
1477  * of updates at the controller level to a rate of one per second to
1478  * debounce this a bit.
1479  */
1480 static const struct timeval vmx_kstat_rate = { 1, 0 };
1481 
1482 /*
1483  * all the vmx stats are 64 bit counters, we just need their name and units.
1484  */
1485 struct vmx_kstat_tpl {
1486 	const char		*name;
1487 	enum kstat_kv_unit	 unit;
1488 };
1489 
1490 static const struct vmx_kstat_tpl vmx_rx_kstat_tpl[UPT1_RxStats_count] = {
1491 	{ "LRO packets",	KSTAT_KV_U_PACKETS },
1492 	{ "LRO bytes",		KSTAT_KV_U_BYTES },
1493 	{ "ucast packets",	KSTAT_KV_U_PACKETS },
1494 	{ "ucast bytes",	KSTAT_KV_U_BYTES },
1495 	{ "mcast packets",	KSTAT_KV_U_PACKETS },
1496 	{ "mcast bytes",	KSTAT_KV_U_BYTES },
1497 	{ "bcast packets",	KSTAT_KV_U_PACKETS },
1498 	{ "bcast bytes",	KSTAT_KV_U_BYTES },
1499 	{ "no buffers",		KSTAT_KV_U_PACKETS },
1500 	{ "errors",		KSTAT_KV_U_PACKETS },
1501 };
1502 
1503 static const struct vmx_kstat_tpl vmx_tx_kstat_tpl[UPT1_TxStats_count] = {
1504 	{ "TSO packets",	KSTAT_KV_U_PACKETS },
1505 	{ "TSO bytes",		KSTAT_KV_U_BYTES },
1506 	{ "ucast packets",	KSTAT_KV_U_PACKETS },
1507 	{ "ucast bytes",	KSTAT_KV_U_BYTES },
1508 	{ "mcast packets",	KSTAT_KV_U_PACKETS },
1509 	{ "mcast bytes",	KSTAT_KV_U_BYTES },
1510 	{ "bcast packets",	KSTAT_KV_U_PACKETS },
1511 	{ "bcast bytes",	KSTAT_KV_U_BYTES },
1512 	{ "errors",		KSTAT_KV_U_PACKETS },
1513 	{ "discards",		KSTAT_KV_U_PACKETS },
1514 };
1515 
1516 static void
1517 vmx_kstat_init(struct vmxnet3_softc *sc)
1518 {
1519 	rw_init(&sc->sc_kstat_lock, "vmxkstat");
1520 }
1521 
1522 static int
1523 vmx_kstat_read(struct kstat *ks)
1524 {
1525 	struct vmxnet3_softc *sc = ks->ks_softc;
1526 	struct kstat_kv *kvs = ks->ks_data;
1527 	uint64_t *vs = ks->ks_ptr;
1528 	unsigned int n, i;
1529 
1530 	if (ratecheck(&sc->sc_kstat_updated, &vmx_kstat_rate)) {
1531 		WRITE_CMD(sc, VMXNET3_CMD_GET_STATS);
1532 		/* barrier? */
1533 	}
1534 
1535 	n = ks->ks_datalen / sizeof(*kvs);
1536 	for (i = 0; i < n; i++)
1537 		kstat_kv_u64(&kvs[i]) = lemtoh64(&vs[i]);
1538 
1539  	TIMEVAL_TO_TIMESPEC(&sc->sc_kstat_updated, &ks->ks_updated);
1540 
1541 	return (0);
1542 }
1543 
1544 static struct kstat *
1545 vmx_kstat_create(struct vmxnet3_softc *sc, const char *name, unsigned int unit,
1546     const struct vmx_kstat_tpl *tpls, unsigned int n, uint64_t *vs)
1547 {
1548 	struct kstat *ks;
1549 	struct kstat_kv *kvs;
1550 	unsigned int i;
1551 
1552 	ks = kstat_create(sc->sc_dev.dv_xname, 0, name, unit,
1553 	    KSTAT_T_KV, 0);
1554 	if (ks == NULL)
1555 		return (NULL);
1556 
1557 	kvs = mallocarray(n, sizeof(*kvs), M_DEVBUF, M_WAITOK|M_ZERO);
1558 	for (i = 0; i < n; i++) {
1559 		const struct vmx_kstat_tpl *tpl = &tpls[i];
1560 
1561 		kstat_kv_unit_init(&kvs[i], tpl->name,
1562 		    KSTAT_KV_T_COUNTER64, tpl->unit);
1563 	}
1564 
1565 	ks->ks_softc = sc;
1566 	kstat_set_wlock(ks, &sc->sc_kstat_lock);
1567 	ks->ks_ptr = vs;
1568 	ks->ks_data = kvs;
1569 	ks->ks_datalen = n * sizeof(*kvs);
1570 	ks->ks_read = vmx_kstat_read;
1571 	TIMEVAL_TO_TIMESPEC(&vmx_kstat_rate, &ks->ks_interval);
1572 
1573 	kstat_install(ks);
1574 
1575 	return (ks);
1576 }
1577 
1578 static void
1579 vmx_kstat_txstats(struct vmxnet3_softc *sc, struct vmxnet3_txqueue *tq,
1580     int unit)
1581 {
1582 	tq->txkstat = vmx_kstat_create(sc, "vmx-txstats", unit,
1583 	    vmx_tx_kstat_tpl, nitems(vmx_tx_kstat_tpl), tq->ts->stats);
1584 }
1585 
1586 static void
1587 vmx_kstat_rxstats(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rq,
1588     int unit)
1589 {
1590 	rq->rxkstat = vmx_kstat_create(sc, "vmx-rxstats", unit,
1591 	    vmx_rx_kstat_tpl, nitems(vmx_rx_kstat_tpl), rq->rs->stats);
1592 }
1593 #endif /* NKSTAT > 0 */
1594