xref: /netbsd-src/sys/dev/pci/if_vmx.c (revision afab4e300d3a9fb07dd8c80daf53d0feb3345706)
1 /*	$NetBSD: if_vmx.c,v 1.11 2022/09/16 07:55:34 knakahara Exp $	*/
2 /*	$OpenBSD: if_vmx.c,v 1.16 2014/01/22 06:04:17 brad Exp $	*/
3 
4 /*
5  * Copyright (c) 2013 Tsubai Masanari
6  * Copyright (c) 2013 Bryan Venteicher <bryanv@FreeBSD.org>
7  *
8  * Permission to use, copy, modify, and distribute this software for any
9  * purpose with or without fee is hereby granted, provided that the above
10  * copyright notice and this permission notice appear in all copies.
11  *
12  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
13  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
14  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
15  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
16  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
17  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
18  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
19  */
20 
21 #include <sys/cdefs.h>
22 __KERNEL_RCSID(0, "$NetBSD: if_vmx.c,v 1.11 2022/09/16 07:55:34 knakahara Exp $");
23 
24 #ifdef _KERNEL_OPT
25 #include "opt_if_vmx.h"
26 #endif
27 
28 #include <sys/param.h>
29 #include <sys/cpu.h>
30 #include <sys/kernel.h>
31 #include <sys/kmem.h>
32 #include <sys/bitops.h>
33 #include <sys/bus.h>
34 #include <sys/device.h>
35 #include <sys/mbuf.h>
36 #include <sys/module.h>
37 #include <sys/sockio.h>
38 #include <sys/pcq.h>
39 #include <sys/workqueue.h>
40 #include <sys/interrupt.h>
41 
42 #include <net/bpf.h>
43 #include <net/if.h>
44 #include <net/if_ether.h>
45 #include <net/if_media.h>
46 
47 #include <netinet/if_inarp.h>
48 #include <netinet/in_systm.h>	/* for <netinet/ip.h> */
49 #include <netinet/in.h>		/* for <netinet/ip.h> */
50 #include <netinet/ip.h>		/* for struct ip */
51 #include <netinet/ip6.h>	/* for struct ip6_hdr */
52 #include <netinet/tcp.h>	/* for struct tcphdr */
53 #include <netinet/udp.h>	/* for struct udphdr */
54 
55 #include <dev/pci/pcivar.h>
56 #include <dev/pci/pcireg.h>
57 #include <dev/pci/pcidevs.h>
58 
59 #include <dev/pci/if_vmxreg.h>
60 
61 #define VMXNET3_DRIVER_VERSION 0x00010000
62 
63 /*
64  * Max descriptors per Tx packet. We must limit the size of the
65  * any TSO packets based on the number of segments.
66  */
67 #define VMXNET3_TX_MAXSEGS		32
68 #define VMXNET3_TX_MAXSIZE		(VMXNET3_TX_MAXSEGS * MCLBYTES)
69 
70 /*
71  * Maximum support Tx segments size. The length field in the
72  * Tx descriptor is 14 bits.
73  */
74 #define VMXNET3_TX_MAXSEGSIZE		(1 << 14)
75 
76 /*
77  * The maximum number of Rx segments we accept.
78  */
79 #define VMXNET3_MAX_RX_SEGS		0	/* no segments */
80 
81 /*
82  * Predetermined size of the multicast MACs filter table. If the
83  * number of multicast addresses exceeds this size, then the
84  * ALL_MULTI mode is use instead.
85  */
86 #define VMXNET3_MULTICAST_MAX		32
87 
88 /*
89  * Our Tx watchdog timeout.
90  */
91 #define VMXNET3_WATCHDOG_TIMEOUT	5
92 
93 /*
94  * Default value for vmx_intr_{rx,tx}_process_limit which is used for
95  * max number of packets to process for interrupt handler
96  */
97 #define VMXNET3_RX_INTR_PROCESS_LIMIT 0U
98 #define VMXNET3_TX_INTR_PROCESS_LIMIT 256
99 
100 /*
101  * Default value for vmx_{rx,tx}_process_limit which is used for
102  * max number of packets to process for deferred processing
103  */
104 #define VMXNET3_RX_PROCESS_LIMIT 256
105 #define VMXNET3_TX_PROCESS_LIMIT 256
106 
107 #define VMXNET3_WORKQUEUE_PRI PRI_SOFTNET
108 
109 /*
110  * IP protocols that we can perform Tx checksum offloading of.
111  */
112 #define VMXNET3_CSUM_OFFLOAD \
113     (M_CSUM_TCPv4 | M_CSUM_UDPv4)
114 #define VMXNET3_CSUM_OFFLOAD_IPV6 \
115     (M_CSUM_TCPv6 | M_CSUM_UDPv6)
116 
117 #define VMXNET3_CSUM_ALL_OFFLOAD \
118     (VMXNET3_CSUM_OFFLOAD | VMXNET3_CSUM_OFFLOAD_IPV6 | M_CSUM_TSOv4 | M_CSUM_TSOv6)
119 
120 #define VMXNET3_RXRINGS_PERQ 2
121 
122 #define VMXNET3_CORE_LOCK(_sc)		mutex_enter((_sc)->vmx_mtx)
123 #define VMXNET3_CORE_UNLOCK(_sc)	mutex_exit((_sc)->vmx_mtx)
124 #define VMXNET3_CORE_LOCK_ASSERT(_sc)	mutex_owned((_sc)->vmx_mtx)
125 
126 #define VMXNET3_RXQ_LOCK(_rxq)		mutex_enter((_rxq)->vxrxq_mtx)
127 #define VMXNET3_RXQ_UNLOCK(_rxq)	mutex_exit((_rxq)->vxrxq_mtx)
128 #define VMXNET3_RXQ_LOCK_ASSERT(_rxq)		\
129     mutex_owned((_rxq)->vxrxq_mtx)
130 
131 #define VMXNET3_TXQ_LOCK(_txq)		mutex_enter((_txq)->vxtxq_mtx)
132 #define VMXNET3_TXQ_TRYLOCK(_txq)	mutex_tryenter((_txq)->vxtxq_mtx)
133 #define VMXNET3_TXQ_UNLOCK(_txq)	mutex_exit((_txq)->vxtxq_mtx)
134 #define VMXNET3_TXQ_LOCK_ASSERT(_txq)		\
135     mutex_owned((_txq)->vxtxq_mtx)
136 
137 struct vmxnet3_dma_alloc {
138 	bus_addr_t dma_paddr;
139 	void *dma_vaddr;
140 	bus_dmamap_t dma_map;
141 	bus_size_t dma_size;
142 	bus_dma_segment_t dma_segs[1];
143 };
144 
145 struct vmxnet3_txbuf {
146 	bus_dmamap_t vtxb_dmamap;
147 	struct mbuf *vtxb_m;
148 };
149 
150 struct vmxnet3_txring {
151 	struct vmxnet3_txbuf *vxtxr_txbuf;
152 	struct vmxnet3_txdesc *vxtxr_txd;
153 	u_int vxtxr_head;
154 	u_int vxtxr_next;
155 	u_int vxtxr_ndesc;
156 	int vxtxr_gen;
157 	struct vmxnet3_dma_alloc vxtxr_dma;
158 };
159 
160 struct vmxnet3_rxbuf {
161 	bus_dmamap_t vrxb_dmamap;
162 	struct mbuf *vrxb_m;
163 };
164 
165 struct vmxnet3_rxring {
166 	struct vmxnet3_rxbuf *vxrxr_rxbuf;
167 	struct vmxnet3_rxdesc *vxrxr_rxd;
168 	u_int vxrxr_fill;
169 	u_int vxrxr_ndesc;
170 	int vxrxr_gen;
171 	int vxrxr_rid;
172 	struct vmxnet3_dma_alloc vxrxr_dma;
173 	bus_dmamap_t vxrxr_spare_dmap;
174 };
175 
176 struct vmxnet3_comp_ring {
177 	union {
178 		struct vmxnet3_txcompdesc *txcd;
179 		struct vmxnet3_rxcompdesc *rxcd;
180 	} vxcr_u;
181 	u_int vxcr_next;
182 	u_int vxcr_ndesc;
183 	int vxcr_gen;
184 	struct vmxnet3_dma_alloc vxcr_dma;
185 };
186 
187 struct vmxnet3_txq_stats {
188 	uint64_t vmtxs_csum;
189 	uint64_t vmtxs_tso;
190 	uint64_t vmtxs_full;
191 	uint64_t vmtxs_offload_failed;
192 };
193 
194 struct vmxnet3_txqueue {
195 	kmutex_t *vxtxq_mtx;
196 	struct vmxnet3_softc *vxtxq_sc;
197 	int vxtxq_watchdog;
198 	pcq_t *vxtxq_interq;
199 	struct vmxnet3_txring vxtxq_cmd_ring;
200 	struct vmxnet3_comp_ring vxtxq_comp_ring;
201 	struct vmxnet3_txq_stats vxtxq_stats;
202 	struct vmxnet3_txq_shared *vxtxq_ts;
203 	char vxtxq_name[16];
204 
205 	void *vxtxq_si;
206 
207 	struct evcnt vxtxq_intr;
208 	struct evcnt vxtxq_defer;
209 	struct evcnt vxtxq_deferreq;
210 	struct evcnt vxtxq_pcqdrop;
211 	struct evcnt vxtxq_transmitdef;
212 	struct evcnt vxtxq_watchdogto;
213 	struct evcnt vxtxq_defragged;
214 	struct evcnt vxtxq_defrag_failed;
215 };
216 
217 
218 struct vmxnet3_rxqueue {
219 	kmutex_t *vxrxq_mtx;
220 	struct vmxnet3_softc *vxrxq_sc;
221 	struct mbuf *vxrxq_mhead;
222 	struct mbuf *vxrxq_mtail;
223 	struct vmxnet3_rxring vxrxq_cmd_ring[VMXNET3_RXRINGS_PERQ];
224 	struct vmxnet3_comp_ring vxrxq_comp_ring;
225 	struct vmxnet3_rxq_shared *vxrxq_rs;
226 	char vxrxq_name[16];
227 
228 	struct evcnt vxrxq_intr;
229 	struct evcnt vxrxq_defer;
230 	struct evcnt vxrxq_deferreq;
231 	struct evcnt vxrxq_mgetcl_failed;
232 	struct evcnt vxrxq_mbuf_load_failed;
233 };
234 
235 struct vmxnet3_queue {
236 	int vxq_id;
237 	int vxq_intr_idx;
238 
239 	struct vmxnet3_txqueue vxq_txqueue;
240 	struct vmxnet3_rxqueue vxq_rxqueue;
241 
242 	void *vxq_si;
243 	bool vxq_workqueue;
244 	bool vxq_wq_enqueued;
245 	struct work vxq_wq_cookie;
246 };
247 
248 struct vmxnet3_softc {
249 	device_t vmx_dev;
250 	struct ethercom vmx_ethercom;
251 	struct ifmedia vmx_media;
252 	struct vmxnet3_driver_shared *vmx_ds;
253 	int vmx_flags;
254 #define VMXNET3_FLAG_NO_MSIX	(1 << 0)
255 #define VMXNET3_FLAG_RSS	(1 << 1)
256 #define VMXNET3_FLAG_ATTACHED	(1 << 2)
257 
258 	struct vmxnet3_queue *vmx_queue;
259 
260 	struct pci_attach_args *vmx_pa;
261 	pci_chipset_tag_t vmx_pc;
262 
263 	bus_space_tag_t vmx_iot0;
264 	bus_space_tag_t vmx_iot1;
265 	bus_space_handle_t vmx_ioh0;
266 	bus_space_handle_t vmx_ioh1;
267 	bus_size_t vmx_ios0;
268 	bus_size_t vmx_ios1;
269 	bus_dma_tag_t vmx_dmat;
270 
271 	int vmx_link_active;
272 	int vmx_ntxqueues;
273 	int vmx_nrxqueues;
274 	int vmx_ntxdescs;
275 	int vmx_nrxdescs;
276 	int vmx_max_rxsegs;
277 
278 	struct evcnt vmx_event_intr;
279 	struct evcnt vmx_event_link;
280 	struct evcnt vmx_event_txqerror;
281 	struct evcnt vmx_event_rxqerror;
282 	struct evcnt vmx_event_dic;
283 	struct evcnt vmx_event_debug;
284 
285 	int vmx_intr_type;
286 	int vmx_intr_mask_mode;
287 	int vmx_event_intr_idx;
288 	int vmx_nintrs;
289 	pci_intr_handle_t *vmx_intrs;	/* legacy use vmx_intrs[0] */
290 	void *vmx_ihs[VMXNET3_MAX_INTRS];
291 
292 	kmutex_t *vmx_mtx;
293 
294 	uint8_t *vmx_mcast;
295 	void *vmx_qs;
296 	struct vmxnet3_rss_shared *vmx_rss;
297 	callout_t vmx_tick;
298 	struct vmxnet3_dma_alloc vmx_ds_dma;
299 	struct vmxnet3_dma_alloc vmx_qs_dma;
300 	struct vmxnet3_dma_alloc vmx_mcast_dma;
301 	struct vmxnet3_dma_alloc vmx_rss_dma;
302 	int vmx_max_ntxqueues;
303 	int vmx_max_nrxqueues;
304 	uint8_t vmx_lladdr[ETHER_ADDR_LEN];
305 
306 	u_int vmx_rx_intr_process_limit;
307 	u_int vmx_tx_intr_process_limit;
308 	u_int vmx_rx_process_limit;
309 	u_int vmx_tx_process_limit;
310 	struct sysctllog *vmx_sysctllog;
311 
312 	bool vmx_txrx_workqueue;
313 	struct workqueue *vmx_queue_wq;
314 };
315 
316 #define VMXNET3_STAT
317 
318 #ifdef VMXNET3_STAT
319 struct {
320 	u_int txhead;
321 	u_int txdone;
322 	u_int maxtxlen;
323 	u_int rxdone;
324 	u_int rxfill;
325 	u_int intr;
326 } vmxstat;
327 #endif
328 
329 typedef enum {
330 	VMXNET3_BARRIER_RD,
331 	VMXNET3_BARRIER_WR,
332 } vmxnet3_barrier_t;
333 
334 #define JUMBO_LEN (MCLBYTES - ETHER_ALIGN)	/* XXX */
335 #define DMAADDR(map) ((map)->dm_segs[0].ds_addr)
336 
337 #define vtophys(va) 0		/* XXX ok? */
338 
339 static int vmxnet3_match(device_t, cfdata_t, void *);
340 static void vmxnet3_attach(device_t, device_t, void *);
341 static int vmxnet3_detach(device_t, int);
342 
343 static int vmxnet3_alloc_pci_resources(struct vmxnet3_softc *);
344 static void vmxnet3_free_pci_resources(struct vmxnet3_softc *);
345 static int vmxnet3_check_version(struct vmxnet3_softc *);
346 static void vmxnet3_check_multiqueue(struct vmxnet3_softc *);
347 
348 static int vmxnet3_alloc_msix_interrupts(struct vmxnet3_softc *);
349 static int vmxnet3_alloc_msi_interrupts(struct vmxnet3_softc *);
350 static int vmxnet3_alloc_legacy_interrupts(struct vmxnet3_softc *);
351 static int vmxnet3_alloc_interrupts(struct vmxnet3_softc *);
352 static void vmxnet3_free_interrupts(struct vmxnet3_softc *);
353 
354 static int vmxnet3_setup_msix_interrupts(struct vmxnet3_softc *);
355 static int vmxnet3_setup_msi_interrupt(struct vmxnet3_softc *);
356 static int vmxnet3_setup_legacy_interrupt(struct vmxnet3_softc *);
357 static void vmxnet3_set_interrupt_idx(struct vmxnet3_softc *);
358 static int vmxnet3_setup_interrupts(struct vmxnet3_softc *);
359 static int vmxnet3_setup_sysctl(struct vmxnet3_softc *);
360 
361 static int vmxnet3_setup_stats(struct vmxnet3_softc *);
362 static void vmxnet3_teardown_stats(struct vmxnet3_softc *);
363 
364 static int vmxnet3_init_rxq(struct vmxnet3_softc *, int);
365 static int vmxnet3_init_txq(struct vmxnet3_softc *, int);
366 static int vmxnet3_alloc_rxtx_queues(struct vmxnet3_softc *);
367 static void vmxnet3_destroy_rxq(struct vmxnet3_rxqueue *);
368 static void vmxnet3_destroy_txq(struct vmxnet3_txqueue *);
369 static void vmxnet3_free_rxtx_queues(struct vmxnet3_softc *);
370 
371 static int vmxnet3_alloc_shared_data(struct vmxnet3_softc *);
372 static void vmxnet3_free_shared_data(struct vmxnet3_softc *);
373 static int vmxnet3_alloc_txq_data(struct vmxnet3_softc *);
374 static void vmxnet3_free_txq_data(struct vmxnet3_softc *);
375 static int vmxnet3_alloc_rxq_data(struct vmxnet3_softc *);
376 static void vmxnet3_free_rxq_data(struct vmxnet3_softc *);
377 static int vmxnet3_alloc_queue_data(struct vmxnet3_softc *);
378 static void vmxnet3_free_queue_data(struct vmxnet3_softc *);
379 static int vmxnet3_alloc_mcast_table(struct vmxnet3_softc *);
380 static void vmxnet3_free_mcast_table(struct vmxnet3_softc *);
381 static void vmxnet3_init_shared_data(struct vmxnet3_softc *);
382 static void vmxnet3_reinit_rss_shared_data(struct vmxnet3_softc *);
383 static void vmxnet3_reinit_shared_data(struct vmxnet3_softc *);
384 static int vmxnet3_alloc_data(struct vmxnet3_softc *);
385 static void vmxnet3_free_data(struct vmxnet3_softc *);
386 static int vmxnet3_setup_interface(struct vmxnet3_softc *);
387 
388 static void vmxnet3_evintr(struct vmxnet3_softc *);
389 static bool vmxnet3_txq_eof(struct vmxnet3_txqueue *, u_int);
390 static int vmxnet3_newbuf(struct vmxnet3_softc *, struct vmxnet3_rxqueue *,
391     struct vmxnet3_rxring *);
392 static void vmxnet3_rxq_eof_discard(struct vmxnet3_rxqueue *,
393     struct vmxnet3_rxring *, int);
394 static void vmxnet3_rxq_discard_chain(struct vmxnet3_rxqueue *);
395 static void vmxnet3_rx_csum(struct vmxnet3_rxcompdesc *, struct mbuf *);
396 static void vmxnet3_rxq_input(struct vmxnet3_rxqueue *,
397     struct vmxnet3_rxcompdesc *, struct mbuf *);
398 static bool vmxnet3_rxq_eof(struct vmxnet3_rxqueue *, u_int);
399 static int vmxnet3_legacy_intr(void *);
400 static int vmxnet3_txrxq_intr(void *);
401 static void vmxnet3_handle_queue(void *);
402 static void vmxnet3_handle_queue_work(struct work *, void *);
403 static int vmxnet3_event_intr(void *);
404 
405 static void vmxnet3_txstop(struct vmxnet3_softc *, struct vmxnet3_txqueue *);
406 static void vmxnet3_rxstop(struct vmxnet3_softc *, struct vmxnet3_rxqueue *);
407 static void vmxnet3_stop_locked(struct vmxnet3_softc *);
408 static void vmxnet3_stop_rendezvous(struct vmxnet3_softc *);
409 static void vmxnet3_stop(struct ifnet *, int);
410 
411 static void vmxnet3_txinit(struct vmxnet3_softc *, struct vmxnet3_txqueue *);
412 static int vmxnet3_rxinit(struct vmxnet3_softc *, struct vmxnet3_rxqueue *);
413 static int vmxnet3_reinit_queues(struct vmxnet3_softc *);
414 static int vmxnet3_enable_device(struct vmxnet3_softc *);
415 static void vmxnet3_reinit_rxfilters(struct vmxnet3_softc *);
416 static int vmxnet3_reinit(struct vmxnet3_softc *);
417 
418 static int vmxnet3_init_locked(struct vmxnet3_softc *);
419 static int vmxnet3_init(struct ifnet *);
420 
421 static int vmxnet3_txq_offload_ctx(struct vmxnet3_txqueue *, struct mbuf *, int *, int *);
422 static int vmxnet3_txq_load_mbuf(struct vmxnet3_txqueue *, struct mbuf **, bus_dmamap_t);
423 static void vmxnet3_txq_unload_mbuf(struct vmxnet3_txqueue *, bus_dmamap_t);
424 static int vmxnet3_txq_encap(struct vmxnet3_txqueue *, struct mbuf **);
425 static void vmxnet3_start_locked(struct ifnet *);
426 static void vmxnet3_start(struct ifnet *);
427 static void vmxnet3_transmit_locked(struct ifnet *, struct vmxnet3_txqueue *);
428 static int vmxnet3_transmit(struct ifnet *, struct mbuf *);
429 static void vmxnet3_deferred_transmit(void *);
430 
431 static void vmxnet3_set_rxfilter(struct vmxnet3_softc *);
432 static int vmxnet3_ioctl(struct ifnet *, u_long, void *);
433 static int vmxnet3_ifflags_cb(struct ethercom *);
434 
435 static int vmxnet3_watchdog(struct vmxnet3_txqueue *);
436 static void vmxnet3_refresh_host_stats(struct vmxnet3_softc *);
437 static void vmxnet3_tick(void *);
438 static void vmxnet3_if_link_status(struct vmxnet3_softc *);
439 static bool vmxnet3_cmd_link_status(struct ifnet *);
440 static void vmxnet3_ifmedia_status(struct ifnet *, struct ifmediareq *);
441 static int vmxnet3_ifmedia_change(struct ifnet *);
442 static void vmxnet3_set_lladdr(struct vmxnet3_softc *);
443 static void vmxnet3_get_lladdr(struct vmxnet3_softc *);
444 
445 static void vmxnet3_enable_all_intrs(struct vmxnet3_softc *);
446 static void vmxnet3_disable_all_intrs(struct vmxnet3_softc *);
447 
448 static int vmxnet3_dma_malloc(struct vmxnet3_softc *, bus_size_t, bus_size_t,
449     struct vmxnet3_dma_alloc *);
450 static void vmxnet3_dma_free(struct vmxnet3_softc *, struct vmxnet3_dma_alloc *);
451 
452 CFATTACH_DECL3_NEW(vmx, sizeof(struct vmxnet3_softc),
453     vmxnet3_match, vmxnet3_attach, vmxnet3_detach, NULL, NULL, NULL, 0);
454 
455 /* round down to the nearest power of 2 */
456 static int
457 vmxnet3_calc_queue_size(int n)
458 {
459 
460 	if (__predict_false(n <= 0))
461 		return 1;
462 
463 	return (1U << (fls32(n) - 1));
464 }
465 
466 static inline void
467 vmxnet3_write_bar0(struct vmxnet3_softc *sc, bus_size_t r, uint32_t v)
468 {
469 
470 	bus_space_write_4(sc->vmx_iot0, sc->vmx_ioh0, r, v);
471 }
472 
473 static inline uint32_t
474 vmxnet3_read_bar1(struct vmxnet3_softc *sc, bus_size_t r)
475 {
476 
477 	return (bus_space_read_4(sc->vmx_iot1, sc->vmx_ioh1, r));
478 }
479 
480 static inline void
481 vmxnet3_write_bar1(struct vmxnet3_softc *sc, bus_size_t r, uint32_t v)
482 {
483 
484 	bus_space_write_4(sc->vmx_iot1, sc->vmx_ioh1, r, v);
485 }
486 
487 static inline void
488 vmxnet3_write_cmd(struct vmxnet3_softc *sc, uint32_t cmd)
489 {
490 
491 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_CMD, cmd);
492 }
493 
494 static inline uint32_t
495 vmxnet3_read_cmd(struct vmxnet3_softc *sc, uint32_t cmd)
496 {
497 
498 	vmxnet3_write_cmd(sc, cmd);
499 	return (vmxnet3_read_bar1(sc, VMXNET3_BAR1_CMD));
500 }
501 
502 static inline void
503 vmxnet3_enable_intr(struct vmxnet3_softc *sc, int irq)
504 {
505 	vmxnet3_write_bar0(sc, VMXNET3_BAR0_IMASK(irq), 0);
506 }
507 
508 static inline void
509 vmxnet3_disable_intr(struct vmxnet3_softc *sc, int irq)
510 {
511 	vmxnet3_write_bar0(sc, VMXNET3_BAR0_IMASK(irq), 1);
512 }
513 
514 static inline void
515 vmxnet3_rxr_increment_fill(struct vmxnet3_rxring *rxr)
516 {
517 
518 	if (++rxr->vxrxr_fill == rxr->vxrxr_ndesc) {
519 		rxr->vxrxr_fill = 0;
520 		rxr->vxrxr_gen ^= 1;
521 	}
522 }
523 
524 static inline int
525 vmxnet3_txring_avail(struct vmxnet3_txring *txr)
526 {
527 	int avail = txr->vxtxr_next - txr->vxtxr_head - 1;
528 	return (avail < 0 ? (int)txr->vxtxr_ndesc + avail : avail);
529 }
530 
531 /*
532  * Since this is a purely paravirtualized device, we do not have
533  * to worry about DMA coherency. But at times, we must make sure
534  * both the compiler and CPU do not reorder memory operations.
535  */
536 static inline void
537 vmxnet3_barrier(struct vmxnet3_softc *sc, vmxnet3_barrier_t type)
538 {
539 
540 	switch (type) {
541 	case VMXNET3_BARRIER_RD:
542 		membar_consumer();
543 		break;
544 	case VMXNET3_BARRIER_WR:
545 		membar_producer();
546 		break;
547 	default:
548 		panic("%s: bad barrier type %d", __func__, type);
549 	}
550 }
551 
552 static int
553 vmxnet3_match(device_t parent, cfdata_t match, void *aux)
554 {
555 	struct pci_attach_args *pa = (struct pci_attach_args *)aux;
556 
557 	if (PCI_VENDOR(pa->pa_id) == PCI_VENDOR_VMWARE &&
558 	    PCI_PRODUCT(pa->pa_id) == PCI_PRODUCT_VMWARE_VMXNET3)
559 		return 1;
560 
561 	return 0;
562 }
563 
564 static void
565 vmxnet3_attach(device_t parent, device_t self, void *aux)
566 {
567 	struct vmxnet3_softc *sc = device_private(self);
568 	struct pci_attach_args *pa = aux;
569 	pcireg_t preg;
570 	int error;
571 	int candidate;
572 
573 	sc->vmx_dev = self;
574 	sc->vmx_pa = pa;
575 	sc->vmx_pc = pa->pa_pc;
576 	if (pci_dma64_available(pa))
577 		sc->vmx_dmat = pa->pa_dmat64;
578 	else
579 		sc->vmx_dmat = pa->pa_dmat;
580 
581 	pci_aprint_devinfo_fancy(pa, "Ethernet controller", "vmxnet3", 1);
582 
583 	preg = pci_conf_read(pa->pa_pc, pa->pa_tag, PCI_COMMAND_STATUS_REG);
584 	preg |= PCI_COMMAND_MASTER_ENABLE;
585 	pci_conf_write(pa->pa_pc, pa->pa_tag, PCI_COMMAND_STATUS_REG, preg);
586 
587 	sc->vmx_mtx = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NET);
588 	callout_init(&sc->vmx_tick, CALLOUT_MPSAFE);
589 
590 	candidate = MIN(MIN(VMXNET3_MAX_TX_QUEUES, VMXNET3_MAX_RX_QUEUES),
591 	    ncpu);
592 	sc->vmx_max_ntxqueues = sc->vmx_max_nrxqueues =
593 	    vmxnet3_calc_queue_size(candidate);
594 	sc->vmx_ntxdescs = 512;
595 	sc->vmx_nrxdescs = 256;
596 	sc->vmx_max_rxsegs = VMXNET3_MAX_RX_SEGS;
597 
598 	error = vmxnet3_alloc_pci_resources(sc);
599 	if (error)
600 		return;
601 
602 	error = vmxnet3_check_version(sc);
603 	if (error)
604 		return;
605 
606 	error = vmxnet3_alloc_rxtx_queues(sc);
607 	if (error)
608 		return;
609 
610 	error = vmxnet3_alloc_interrupts(sc);
611 	if (error)
612 		return;
613 
614 	vmxnet3_check_multiqueue(sc);
615 
616 	error = vmxnet3_alloc_data(sc);
617 	if (error)
618 		return;
619 
620 	error = vmxnet3_setup_interface(sc);
621 	if (error)
622 		return;
623 
624 	error = vmxnet3_setup_interrupts(sc);
625 	if (error)
626 		return;
627 
628 	error = vmxnet3_setup_sysctl(sc);
629 	if (error)
630 		return;
631 
632 	error = vmxnet3_setup_stats(sc);
633 	if (error)
634 		return;
635 
636 	sc->vmx_flags |= VMXNET3_FLAG_ATTACHED;
637 }
638 
639 static int
640 vmxnet3_detach(device_t self, int flags)
641 {
642 	struct vmxnet3_softc *sc;
643 	struct ifnet *ifp;
644 
645 	sc = device_private(self);
646 	ifp = &sc->vmx_ethercom.ec_if;
647 
648 	if (sc->vmx_flags & VMXNET3_FLAG_ATTACHED) {
649 		VMXNET3_CORE_LOCK(sc);
650 		vmxnet3_stop_locked(sc);
651 		callout_halt(&sc->vmx_tick, sc->vmx_mtx);
652 		callout_destroy(&sc->vmx_tick);
653 		VMXNET3_CORE_UNLOCK(sc);
654 
655 		ether_ifdetach(ifp);
656 		if_detach(ifp);
657 		ifmedia_fini(&sc->vmx_media);
658 	}
659 
660 	vmxnet3_teardown_stats(sc);
661 	sysctl_teardown(&sc->vmx_sysctllog);
662 
663 	vmxnet3_free_interrupts(sc);
664 
665 	vmxnet3_free_data(sc);
666 	vmxnet3_free_pci_resources(sc);
667 	vmxnet3_free_rxtx_queues(sc);
668 
669 	if (sc->vmx_mtx)
670 		mutex_obj_free(sc->vmx_mtx);
671 
672 	return (0);
673 }
674 
675 static int
676 vmxnet3_alloc_pci_resources(struct vmxnet3_softc *sc)
677 {
678 	struct pci_attach_args *pa = sc->vmx_pa;
679 	pcireg_t memtype;
680 
681 	memtype = pci_mapreg_type(pa->pa_pc, pa->pa_tag, PCI_BAR(0));
682 	if (pci_mapreg_map(pa, PCI_BAR(0), memtype, 0, &sc->vmx_iot0, &sc->vmx_ioh0,
683 	    NULL, &sc->vmx_ios0)) {
684 		aprint_error_dev(sc->vmx_dev, "failed to map BAR0\n");
685 		return (ENXIO);
686 	}
687 	memtype = pci_mapreg_type(pa->pa_pc, pa->pa_tag, PCI_BAR(1));
688 	if (pci_mapreg_map(pa, PCI_BAR(1), memtype, 0, &sc->vmx_iot1, &sc->vmx_ioh1,
689 	    NULL, &sc->vmx_ios1)) {
690 		aprint_error_dev(sc->vmx_dev, "failed to map BAR1\n");
691 		return (ENXIO);
692 	}
693 
694 	if (!pci_get_capability(pa->pa_pc, pa->pa_tag, PCI_CAP_MSIX, NULL, NULL)) {
695 		sc->vmx_flags |= VMXNET3_FLAG_NO_MSIX;
696 		return (0);
697 	}
698 
699 	return (0);
700 }
701 
702 static void
703 vmxnet3_free_pci_resources(struct vmxnet3_softc *sc)
704 {
705 
706 	if (sc->vmx_ios0) {
707 		bus_space_unmap(sc->vmx_iot0, sc->vmx_ioh0, sc->vmx_ios0);
708 		sc->vmx_ios0 = 0;
709 	}
710 
711 	if (sc->vmx_ios1) {
712 		bus_space_unmap(sc->vmx_iot1, sc->vmx_ioh1, sc->vmx_ios1);
713 		sc->vmx_ios1 = 0;
714 	}
715 }
716 
717 static int
718 vmxnet3_check_version(struct vmxnet3_softc *sc)
719 {
720 	u_int ver;
721 
722 	ver = vmxnet3_read_bar1(sc, VMXNET3_BAR1_VRRS);
723 	if ((ver & 0x1) == 0) {
724 		aprint_error_dev(sc->vmx_dev,
725 		    "unsupported hardware version 0x%x\n", ver);
726 		return (ENOTSUP);
727 	}
728 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_VRRS, 1);
729 
730 	ver = vmxnet3_read_bar1(sc, VMXNET3_BAR1_UVRS);
731 	if ((ver & 0x1) == 0) {
732 		aprint_error_dev(sc->vmx_dev,
733 		    "incompatiable UPT version 0x%x\n", ver);
734 		return (ENOTSUP);
735 	}
736 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_UVRS, 1);
737 
738 	return (0);
739 }
740 
741 static void
742 vmxnet3_check_multiqueue(struct vmxnet3_softc *sc)
743 {
744 
745 	if (sc->vmx_intr_type != VMXNET3_IT_MSIX)
746 		goto out;
747 
748 	/* Just use the maximum configured for now. */
749 	sc->vmx_nrxqueues = sc->vmx_max_nrxqueues;
750 	sc->vmx_ntxqueues = sc->vmx_max_ntxqueues;
751 
752 	if (sc->vmx_nrxqueues > 1)
753 		sc->vmx_flags |= VMXNET3_FLAG_RSS;
754 
755 	return;
756 
757 out:
758 	sc->vmx_ntxqueues = 1;
759 	sc->vmx_nrxqueues = 1;
760 }
761 
762 static int
763 vmxnet3_alloc_msix_interrupts(struct vmxnet3_softc *sc)
764 {
765 	int required;
766 	struct pci_attach_args *pa = sc->vmx_pa;
767 
768 	if (sc->vmx_flags & VMXNET3_FLAG_NO_MSIX)
769 		return (1);
770 
771 	/* Allocate an additional vector for the events interrupt. */
772 	required = MIN(sc->vmx_max_ntxqueues, sc->vmx_max_nrxqueues) + 1;
773 
774 	if (pci_msix_count(pa->pa_pc, pa->pa_tag) < required)
775 		return (1);
776 
777 	if (pci_msix_alloc_exact(pa, &sc->vmx_intrs, required) == 0) {
778 		sc->vmx_nintrs = required;
779 		return (0);
780 	}
781 
782 	return (1);
783 }
784 
785 static int
786 vmxnet3_alloc_msi_interrupts(struct vmxnet3_softc *sc)
787 {
788 	int nmsi, required;
789 	struct pci_attach_args *pa = sc->vmx_pa;
790 
791 	required = 1;
792 
793 	nmsi = pci_msi_count(pa->pa_pc, pa->pa_tag);
794 	if (nmsi < required)
795 		return (1);
796 
797 	if (pci_msi_alloc_exact(pa, &sc->vmx_intrs, required) == 0) {
798 		sc->vmx_nintrs = required;
799 		return (0);
800 	}
801 
802 	return (1);
803 }
804 
805 static int
806 vmxnet3_alloc_legacy_interrupts(struct vmxnet3_softc *sc)
807 {
808 
809 	if (pci_intx_alloc(sc->vmx_pa, &sc->vmx_intrs) == 0) {
810 		sc->vmx_nintrs = 1;
811 		return (0);
812 	}
813 
814 	return (1);
815 }
816 
817 static int
818 vmxnet3_alloc_interrupts(struct vmxnet3_softc *sc)
819 {
820 	u_int config;
821 	int error;
822 
823 	config = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_INTRCFG);
824 
825 	sc->vmx_intr_type = config & 0x03;
826 	sc->vmx_intr_mask_mode = (config >> 2) & 0x03;
827 
828 	switch (sc->vmx_intr_type) {
829 	case VMXNET3_IT_AUTO:
830 		sc->vmx_intr_type = VMXNET3_IT_MSIX;
831 		/* FALLTHROUGH */
832 	case VMXNET3_IT_MSIX:
833 		error = vmxnet3_alloc_msix_interrupts(sc);
834 		if (error == 0)
835 			break;
836 		sc->vmx_intr_type = VMXNET3_IT_MSI;
837 		/* FALLTHROUGH */
838 	case VMXNET3_IT_MSI:
839 		error = vmxnet3_alloc_msi_interrupts(sc);
840 		if (error == 0)
841 			break;
842 		sc->vmx_intr_type = VMXNET3_IT_LEGACY;
843 		/* FALLTHROUGH */
844 	case VMXNET3_IT_LEGACY:
845 		error = vmxnet3_alloc_legacy_interrupts(sc);
846 		if (error == 0)
847 			break;
848 		/* FALLTHROUGH */
849 	default:
850 		sc->vmx_intr_type = -1;
851 		aprint_error_dev(sc->vmx_dev, "cannot allocate any interrupt resources\n");
852 		return (ENXIO);
853 	}
854 
855 	return (error);
856 }
857 
858 static void
859 vmxnet3_free_interrupts(struct vmxnet3_softc *sc)
860 {
861 	pci_chipset_tag_t pc = sc->vmx_pc;
862 	int i;
863 
864 	workqueue_destroy(sc->vmx_queue_wq);
865 	for (i = 0; i < sc->vmx_ntxqueues; i++) {
866 		struct vmxnet3_queue *vmxq =  &sc->vmx_queue[i];
867 
868 		softint_disestablish(vmxq->vxq_si);
869 		vmxq->vxq_si = NULL;
870 	}
871 	for (i = 0; i < sc->vmx_nintrs; i++) {
872 		pci_intr_disestablish(pc, sc->vmx_ihs[i]);
873 	}
874 	pci_intr_release(pc, sc->vmx_intrs, sc->vmx_nintrs);
875 }
876 
877 static int
878 vmxnet3_setup_msix_interrupts(struct vmxnet3_softc *sc)
879 {
880 	pci_chipset_tag_t pc = sc->vmx_pa->pa_pc;
881 	struct vmxnet3_queue *vmxq;
882 	pci_intr_handle_t *intr;
883 	void **ihs;
884 	int intr_idx, i, use_queues, error;
885 	kcpuset_t *affinity;
886 	const char *intrstr;
887 	char intrbuf[PCI_INTRSTR_LEN];
888 	char xnamebuf[32];
889 
890 	intr = sc->vmx_intrs;
891 	intr_idx = 0;
892 	ihs = sc->vmx_ihs;
893 
894 	/* See vmxnet3_alloc_msix_interrupts() */
895 	use_queues = MIN(sc->vmx_max_ntxqueues, sc->vmx_max_nrxqueues);
896 	for (i = 0; i < use_queues; i++, intr++, ihs++, intr_idx++) {
897 		snprintf(xnamebuf, 32, "%s: txrx %d", device_xname(sc->vmx_dev), i);
898 
899 		vmxq = &sc->vmx_queue[i];
900 
901 		intrstr = pci_intr_string(pc, *intr, intrbuf, sizeof(intrbuf));
902 
903 		pci_intr_setattr(pc, intr, PCI_INTR_MPSAFE, true);
904 		*ihs = pci_intr_establish_xname(pc, *intr, IPL_NET,
905 		    vmxnet3_txrxq_intr, vmxq, xnamebuf);
906 		if (*ihs == NULL) {
907 			aprint_error_dev(sc->vmx_dev,
908 			    "unable to establish txrx interrupt at %s\n", intrstr);
909 			return (-1);
910 		}
911 		aprint_normal_dev(sc->vmx_dev, "txrx interrupting at %s\n", intrstr);
912 
913 		kcpuset_create(&affinity, true);
914 		kcpuset_set(affinity, intr_idx % ncpu);
915 		error = interrupt_distribute(*ihs, affinity, NULL);
916 		if (error) {
917 			aprint_normal_dev(sc->vmx_dev,
918 			    "%s cannot be changed affinity, use default CPU\n",
919 			    intrstr);
920 		}
921 		kcpuset_destroy(affinity);
922 
923 		vmxq->vxq_si = softint_establish(SOFTINT_NET | SOFTINT_MPSAFE,
924 		    vmxnet3_handle_queue, vmxq);
925 		if (vmxq->vxq_si == NULL) {
926 			aprint_error_dev(sc->vmx_dev,
927 			    "softint_establish for vxq_si failed\n");
928 			return (-1);
929 		}
930 
931 		vmxq->vxq_intr_idx = intr_idx;
932 	}
933 	snprintf(xnamebuf, MAXCOMLEN, "%s_tx_rx", device_xname(sc->vmx_dev));
934 	error = workqueue_create(&sc->vmx_queue_wq, xnamebuf,
935 	    vmxnet3_handle_queue_work, sc, VMXNET3_WORKQUEUE_PRI, IPL_NET,
936 	    WQ_PERCPU | WQ_MPSAFE);
937 	if (error) {
938 		aprint_error_dev(sc->vmx_dev, "workqueue_create failed\n");
939 		return (-1);
940 	}
941 	sc->vmx_txrx_workqueue = false;
942 
943 	intrstr = pci_intr_string(pc, *intr, intrbuf, sizeof(intrbuf));
944 
945 	snprintf(xnamebuf, 32, "%s: link", device_xname(sc->vmx_dev));
946 	pci_intr_setattr(pc, intr, PCI_INTR_MPSAFE, true);
947 	*ihs = pci_intr_establish_xname(pc, *intr, IPL_NET,
948 	    vmxnet3_event_intr, sc, xnamebuf);
949 	if (*ihs == NULL) {
950 		aprint_error_dev(sc->vmx_dev,
951 		    "unable to establish event interrupt at %s\n", intrstr);
952 		return (-1);
953 	}
954 	aprint_normal_dev(sc->vmx_dev, "event interrupting at %s\n", intrstr);
955 
956 	sc->vmx_event_intr_idx = intr_idx;
957 
958 	return (0);
959 }
960 
961 static int
962 vmxnet3_setup_msi_interrupt(struct vmxnet3_softc *sc)
963 {
964 	pci_chipset_tag_t pc = sc->vmx_pa->pa_pc;
965 	pci_intr_handle_t *intr;
966 	void **ihs;
967 	struct vmxnet3_queue *vmxq;
968 	int i;
969 	const char *intrstr;
970 	char intrbuf[PCI_INTRSTR_LEN];
971 	char xnamebuf[32];
972 
973 	intr = &sc->vmx_intrs[0];
974 	ihs = sc->vmx_ihs;
975 	vmxq = &sc->vmx_queue[0];
976 
977 	intrstr = pci_intr_string(pc, *intr, intrbuf, sizeof(intrbuf));
978 
979 	snprintf(xnamebuf, 32, "%s: msi", device_xname(sc->vmx_dev));
980 	pci_intr_setattr(pc, intr, PCI_INTR_MPSAFE, true);
981 	*ihs = pci_intr_establish_xname(pc, *intr, IPL_NET,
982 	    vmxnet3_legacy_intr, sc, xnamebuf);
983 	if (*ihs == NULL) {
984 		aprint_error_dev(sc->vmx_dev,
985 		    "unable to establish interrupt at %s\n", intrstr);
986 		return (-1);
987 	}
988 	aprint_normal_dev(sc->vmx_dev, "interrupting at %s\n", intrstr);
989 
990 	vmxq->vxq_si = softint_establish(SOFTINT_NET | SOFTINT_MPSAFE,
991 	    vmxnet3_handle_queue, vmxq);
992 	if (vmxq->vxq_si == NULL) {
993 		aprint_error_dev(sc->vmx_dev,
994 		    "softint_establish for vxq_si failed\n");
995 		return (-1);
996 	}
997 
998 	for (i = 0; i < MIN(sc->vmx_nrxqueues, sc->vmx_nrxqueues); i++)
999 		sc->vmx_queue[i].vxq_intr_idx = 0;
1000 	sc->vmx_event_intr_idx = 0;
1001 
1002 	return (0);
1003 }
1004 
1005 static int
1006 vmxnet3_setup_legacy_interrupt(struct vmxnet3_softc *sc)
1007 {
1008 	pci_chipset_tag_t pc = sc->vmx_pa->pa_pc;
1009 	pci_intr_handle_t *intr;
1010 	void **ihs;
1011 	struct vmxnet3_queue *vmxq;
1012 	int i;
1013 	const char *intrstr;
1014 	char intrbuf[PCI_INTRSTR_LEN];
1015 	char xnamebuf[32];
1016 
1017 	intr = &sc->vmx_intrs[0];
1018 	ihs = sc->vmx_ihs;
1019 	vmxq = &sc->vmx_queue[0];
1020 
1021 	intrstr = pci_intr_string(pc, *intr, intrbuf, sizeof(intrbuf));
1022 
1023 	snprintf(xnamebuf, 32, "%s:legacy", device_xname(sc->vmx_dev));
1024 	pci_intr_setattr(pc, intr, PCI_INTR_MPSAFE, true);
1025 	*ihs = pci_intr_establish_xname(pc, *intr, IPL_NET,
1026 	    vmxnet3_legacy_intr, sc, xnamebuf);
1027 	if (*ihs == NULL) {
1028 		aprint_error_dev(sc->vmx_dev,
1029 		    "unable to establish interrupt at %s\n", intrstr);
1030 		return (-1);
1031 	}
1032 	aprint_normal_dev(sc->vmx_dev, "interrupting at %s\n", intrstr);
1033 
1034 	vmxq->vxq_si = softint_establish(SOFTINT_NET | SOFTINT_MPSAFE,
1035 	    vmxnet3_handle_queue, vmxq);
1036 	if (vmxq->vxq_si == NULL) {
1037 		aprint_error_dev(sc->vmx_dev,
1038 		    "softint_establish for vxq_si failed\n");
1039 		return (-1);
1040 	}
1041 
1042 	for (i = 0; i < MIN(sc->vmx_nrxqueues, sc->vmx_nrxqueues); i++)
1043 		sc->vmx_queue[i].vxq_intr_idx = 0;
1044 	sc->vmx_event_intr_idx = 0;
1045 
1046 	return (0);
1047 }
1048 
1049 static void
1050 vmxnet3_set_interrupt_idx(struct vmxnet3_softc *sc)
1051 {
1052 	struct vmxnet3_queue *vmxq;
1053 	struct vmxnet3_txqueue *txq;
1054 	struct vmxnet3_txq_shared *txs;
1055 	struct vmxnet3_rxqueue *rxq;
1056 	struct vmxnet3_rxq_shared *rxs;
1057 	int i;
1058 
1059 	sc->vmx_ds->evintr = sc->vmx_event_intr_idx;
1060 
1061 	for (i = 0; i < sc->vmx_ntxqueues; i++) {
1062 		vmxq = &sc->vmx_queue[i];
1063 		txq = &vmxq->vxq_txqueue;
1064 		txs = txq->vxtxq_ts;
1065 		txs->intr_idx = vmxq->vxq_intr_idx;
1066 	}
1067 
1068 	for (i = 0; i < sc->vmx_nrxqueues; i++) {
1069 		vmxq = &sc->vmx_queue[i];
1070 		rxq = &vmxq->vxq_rxqueue;
1071 		rxs = rxq->vxrxq_rs;
1072 		rxs->intr_idx = vmxq->vxq_intr_idx;
1073 	}
1074 }
1075 
1076 static int
1077 vmxnet3_setup_interrupts(struct vmxnet3_softc *sc)
1078 {
1079 	int error;
1080 
1081 	switch (sc->vmx_intr_type) {
1082 	case VMXNET3_IT_MSIX:
1083 		error = vmxnet3_setup_msix_interrupts(sc);
1084 		break;
1085 	case VMXNET3_IT_MSI:
1086 		error = vmxnet3_setup_msi_interrupt(sc);
1087 		break;
1088 	case VMXNET3_IT_LEGACY:
1089 		error = vmxnet3_setup_legacy_interrupt(sc);
1090 		break;
1091 	default:
1092 		panic("%s: invalid interrupt type %d", __func__,
1093 		    sc->vmx_intr_type);
1094 	}
1095 
1096 	if (error == 0)
1097 		vmxnet3_set_interrupt_idx(sc);
1098 
1099 	return (error);
1100 }
1101 
1102 static int
1103 vmxnet3_init_rxq(struct vmxnet3_softc *sc, int q)
1104 {
1105 	struct vmxnet3_rxqueue *rxq;
1106 	struct vmxnet3_rxring *rxr;
1107 	int i;
1108 
1109 	rxq = &sc->vmx_queue[q].vxq_rxqueue;
1110 
1111 	snprintf(rxq->vxrxq_name, sizeof(rxq->vxrxq_name), "%s-rx%d",
1112 	    device_xname(sc->vmx_dev), q);
1113 	rxq->vxrxq_mtx = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NET /* XXX */);
1114 
1115 	rxq->vxrxq_sc = sc;
1116 
1117 	for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1118 		rxr = &rxq->vxrxq_cmd_ring[i];
1119 		rxr->vxrxr_rid = i;
1120 		rxr->vxrxr_ndesc = sc->vmx_nrxdescs;
1121 		rxr->vxrxr_rxbuf = kmem_zalloc(rxr->vxrxr_ndesc *
1122 		    sizeof(struct vmxnet3_rxbuf), KM_SLEEP);
1123 
1124 		rxq->vxrxq_comp_ring.vxcr_ndesc += sc->vmx_nrxdescs;
1125 	}
1126 
1127 	return (0);
1128 }
1129 
1130 static int
1131 vmxnet3_init_txq(struct vmxnet3_softc *sc, int q)
1132 {
1133 	struct vmxnet3_txqueue *txq;
1134 	struct vmxnet3_txring *txr;
1135 
1136 	txq = &sc->vmx_queue[q].vxq_txqueue;
1137 	txr = &txq->vxtxq_cmd_ring;
1138 
1139 	snprintf(txq->vxtxq_name, sizeof(txq->vxtxq_name), "%s-tx%d",
1140 	    device_xname(sc->vmx_dev), q);
1141 	txq->vxtxq_mtx = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NET /* XXX */);
1142 
1143 	txq->vxtxq_sc = sc;
1144 
1145 	txq->vxtxq_si = softint_establish(SOFTINT_NET | SOFTINT_MPSAFE,
1146 	    vmxnet3_deferred_transmit, txq);
1147 	if (txq->vxtxq_si == NULL) {
1148 		mutex_obj_free(txq->vxtxq_mtx);
1149 		aprint_error_dev(sc->vmx_dev,
1150 		    "softint_establish for vxtxq_si failed\n");
1151 		return ENOMEM;
1152 	}
1153 
1154 	txr->vxtxr_ndesc = sc->vmx_ntxdescs;
1155 	txr->vxtxr_txbuf = kmem_zalloc(txr->vxtxr_ndesc *
1156 	    sizeof(struct vmxnet3_txbuf), KM_SLEEP);
1157 
1158 	txq->vxtxq_comp_ring.vxcr_ndesc = sc->vmx_ntxdescs;
1159 
1160 	txq->vxtxq_interq = pcq_create(sc->vmx_ntxdescs, KM_SLEEP);
1161 
1162 	return (0);
1163 }
1164 
1165 static int
1166 vmxnet3_alloc_rxtx_queues(struct vmxnet3_softc *sc)
1167 {
1168 	int i, error, max_nqueues;
1169 
1170 	KASSERT(!cpu_intr_p());
1171 	KASSERT(!cpu_softintr_p());
1172 
1173 	/*
1174 	 * Only attempt to create multiple queues if MSIX is available.
1175 	 * This check prevents us from allocating queue structures that
1176 	 * we will not use.
1177 	 *
1178 	 * FreeBSD:
1179 	 * MSIX is disabled by default because its apparently broken for
1180 	 * devices passed through by at least ESXi 5.1.
1181 	 * The hw.pci.honor_msi_blacklist tunable must be set to zero for MSIX.
1182 	 */
1183 	if (sc->vmx_flags & VMXNET3_FLAG_NO_MSIX) {
1184 		sc->vmx_max_nrxqueues = 1;
1185 		sc->vmx_max_ntxqueues = 1;
1186 	}
1187 
1188 	max_nqueues = MAX(sc->vmx_max_ntxqueues, sc->vmx_max_nrxqueues);
1189 	sc->vmx_queue = kmem_zalloc(sizeof(struct vmxnet3_queue) * max_nqueues,
1190 	    KM_SLEEP);
1191 
1192 	for (i = 0; i < max_nqueues; i++) {
1193 		struct vmxnet3_queue *vmxq = &sc->vmx_queue[i];
1194 		vmxq->vxq_id = i;
1195 	}
1196 
1197 	for (i = 0; i < sc->vmx_max_nrxqueues; i++) {
1198 		error = vmxnet3_init_rxq(sc, i);
1199 		if (error)
1200 			return (error);
1201 	}
1202 
1203 	for (i = 0; i < sc->vmx_max_ntxqueues; i++) {
1204 		error = vmxnet3_init_txq(sc, i);
1205 		if (error)
1206 			return (error);
1207 	}
1208 
1209 	return (0);
1210 }
1211 
1212 static void
1213 vmxnet3_destroy_rxq(struct vmxnet3_rxqueue *rxq)
1214 {
1215 	struct vmxnet3_rxring *rxr;
1216 	int i;
1217 
1218 	rxq->vxrxq_sc = NULL;
1219 
1220 	for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1221 		rxr = &rxq->vxrxq_cmd_ring[i];
1222 
1223 		if (rxr->vxrxr_rxbuf != NULL) {
1224 			kmem_free(rxr->vxrxr_rxbuf,
1225 			    rxr->vxrxr_ndesc * sizeof(struct vmxnet3_rxbuf));
1226 			rxr->vxrxr_rxbuf = NULL;
1227 		}
1228 	}
1229 
1230 	if (rxq->vxrxq_mtx != NULL)
1231 		mutex_obj_free(rxq->vxrxq_mtx);
1232 }
1233 
1234 static void
1235 vmxnet3_destroy_txq(struct vmxnet3_txqueue *txq)
1236 {
1237 	struct vmxnet3_txring *txr;
1238 	struct mbuf *m;
1239 
1240 	txr = &txq->vxtxq_cmd_ring;
1241 
1242 	txq->vxtxq_sc = NULL;
1243 
1244 	softint_disestablish(txq->vxtxq_si);
1245 
1246 	while ((m = pcq_get(txq->vxtxq_interq)) != NULL)
1247 		m_freem(m);
1248 	pcq_destroy(txq->vxtxq_interq);
1249 
1250 	if (txr->vxtxr_txbuf != NULL) {
1251 		kmem_free(txr->vxtxr_txbuf,
1252 		    txr->vxtxr_ndesc * sizeof(struct vmxnet3_txbuf));
1253 		txr->vxtxr_txbuf = NULL;
1254 	}
1255 
1256 	if (txq->vxtxq_mtx != NULL)
1257 		mutex_obj_free(txq->vxtxq_mtx);
1258 }
1259 
1260 static void
1261 vmxnet3_free_rxtx_queues(struct vmxnet3_softc *sc)
1262 {
1263 	int i;
1264 
1265 	if (sc->vmx_queue != NULL) {
1266 		int max_nqueues;
1267 
1268 		for (i = 0; i < sc->vmx_max_nrxqueues; i++)
1269 			vmxnet3_destroy_rxq(&sc->vmx_queue[i].vxq_rxqueue);
1270 
1271 		for (i = 0; i < sc->vmx_max_ntxqueues; i++)
1272 			vmxnet3_destroy_txq(&sc->vmx_queue[i].vxq_txqueue);
1273 
1274 		max_nqueues = MAX(sc->vmx_max_nrxqueues, sc->vmx_max_ntxqueues);
1275 		kmem_free(sc->vmx_queue,
1276 		    sizeof(struct vmxnet3_queue) * max_nqueues);
1277 	}
1278 }
1279 
1280 static int
1281 vmxnet3_alloc_shared_data(struct vmxnet3_softc *sc)
1282 {
1283 	device_t dev;
1284 	uint8_t *kva;
1285 	size_t size;
1286 	int i, error;
1287 
1288 	dev = sc->vmx_dev;
1289 
1290 	size = sizeof(struct vmxnet3_driver_shared);
1291 	error = vmxnet3_dma_malloc(sc, size, 1, &sc->vmx_ds_dma);
1292 	if (error) {
1293 		device_printf(dev, "cannot alloc shared memory\n");
1294 		return (error);
1295 	}
1296 	sc->vmx_ds = (struct vmxnet3_driver_shared *) sc->vmx_ds_dma.dma_vaddr;
1297 
1298 	size = sc->vmx_ntxqueues * sizeof(struct vmxnet3_txq_shared) +
1299 	    sc->vmx_nrxqueues * sizeof(struct vmxnet3_rxq_shared);
1300 	error = vmxnet3_dma_malloc(sc, size, 128, &sc->vmx_qs_dma);
1301 	if (error) {
1302 		device_printf(dev, "cannot alloc queue shared memory\n");
1303 		return (error);
1304 	}
1305 	sc->vmx_qs = (void *) sc->vmx_qs_dma.dma_vaddr;
1306 	kva = sc->vmx_qs;
1307 
1308 	for (i = 0; i < sc->vmx_ntxqueues; i++) {
1309 		sc->vmx_queue[i].vxq_txqueue.vxtxq_ts =
1310 		    (struct vmxnet3_txq_shared *) kva;
1311 		kva += sizeof(struct vmxnet3_txq_shared);
1312 	}
1313 	for (i = 0; i < sc->vmx_nrxqueues; i++) {
1314 		sc->vmx_queue[i].vxq_rxqueue.vxrxq_rs =
1315 		    (struct vmxnet3_rxq_shared *) kva;
1316 		kva += sizeof(struct vmxnet3_rxq_shared);
1317 	}
1318 
1319 	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
1320 		size = sizeof(struct vmxnet3_rss_shared);
1321 		error = vmxnet3_dma_malloc(sc, size, 128, &sc->vmx_rss_dma);
1322 		if (error) {
1323 			device_printf(dev, "cannot alloc rss shared memory\n");
1324 			return (error);
1325 		}
1326 		sc->vmx_rss =
1327 		    (struct vmxnet3_rss_shared *) sc->vmx_rss_dma.dma_vaddr;
1328 	}
1329 
1330 	return (0);
1331 }
1332 
1333 static void
1334 vmxnet3_free_shared_data(struct vmxnet3_softc *sc)
1335 {
1336 
1337 	if (sc->vmx_rss != NULL) {
1338 		vmxnet3_dma_free(sc, &sc->vmx_rss_dma);
1339 		sc->vmx_rss = NULL;
1340 	}
1341 
1342 	if (sc->vmx_qs != NULL) {
1343 		vmxnet3_dma_free(sc, &sc->vmx_qs_dma);
1344 		sc->vmx_qs = NULL;
1345 	}
1346 
1347 	if (sc->vmx_ds != NULL) {
1348 		vmxnet3_dma_free(sc, &sc->vmx_ds_dma);
1349 		sc->vmx_ds = NULL;
1350 	}
1351 }
1352 
1353 static int
1354 vmxnet3_alloc_txq_data(struct vmxnet3_softc *sc)
1355 {
1356 	device_t dev;
1357 	struct vmxnet3_txqueue *txq;
1358 	struct vmxnet3_txring *txr;
1359 	struct vmxnet3_comp_ring *txc;
1360 	size_t descsz, compsz;
1361 	u_int i;
1362 	int q, error;
1363 
1364 	dev = sc->vmx_dev;
1365 
1366 	for (q = 0; q < sc->vmx_ntxqueues; q++) {
1367 		txq = &sc->vmx_queue[q].vxq_txqueue;
1368 		txr = &txq->vxtxq_cmd_ring;
1369 		txc = &txq->vxtxq_comp_ring;
1370 
1371 		descsz = txr->vxtxr_ndesc * sizeof(struct vmxnet3_txdesc);
1372 		compsz = txr->vxtxr_ndesc * sizeof(struct vmxnet3_txcompdesc);
1373 
1374 		error = vmxnet3_dma_malloc(sc, descsz, 512, &txr->vxtxr_dma);
1375 		if (error) {
1376 			device_printf(dev, "cannot alloc Tx descriptors for "
1377 			    "queue %d error %d\n", q, error);
1378 			return (error);
1379 		}
1380 		txr->vxtxr_txd =
1381 		    (struct vmxnet3_txdesc *) txr->vxtxr_dma.dma_vaddr;
1382 
1383 		error = vmxnet3_dma_malloc(sc, compsz, 512, &txc->vxcr_dma);
1384 		if (error) {
1385 			device_printf(dev, "cannot alloc Tx comp descriptors "
1386 			   "for queue %d error %d\n", q, error);
1387 			return (error);
1388 		}
1389 		txc->vxcr_u.txcd =
1390 		    (struct vmxnet3_txcompdesc *) txc->vxcr_dma.dma_vaddr;
1391 
1392 		for (i = 0; i < txr->vxtxr_ndesc; i++) {
1393 			error = bus_dmamap_create(sc->vmx_dmat, VMXNET3_TX_MAXSIZE,
1394 			    VMXNET3_TX_MAXSEGS, VMXNET3_TX_MAXSEGSIZE, 0, BUS_DMA_NOWAIT,
1395 			    &txr->vxtxr_txbuf[i].vtxb_dmamap);
1396 			if (error) {
1397 				device_printf(dev, "unable to create Tx buf "
1398 				    "dmamap for queue %d idx %d\n", q, i);
1399 				return (error);
1400 			}
1401 		}
1402 	}
1403 
1404 	return (0);
1405 }
1406 
1407 static void
1408 vmxnet3_free_txq_data(struct vmxnet3_softc *sc)
1409 {
1410 	struct vmxnet3_txqueue *txq;
1411 	struct vmxnet3_txring *txr;
1412 	struct vmxnet3_comp_ring *txc;
1413 	struct vmxnet3_txbuf *txb;
1414 	u_int i;
1415 	int q;
1416 
1417 	for (q = 0; q < sc->vmx_ntxqueues; q++) {
1418 		txq = &sc->vmx_queue[q].vxq_txqueue;
1419 		txr = &txq->vxtxq_cmd_ring;
1420 		txc = &txq->vxtxq_comp_ring;
1421 
1422 		for (i = 0; i < txr->vxtxr_ndesc; i++) {
1423 			txb = &txr->vxtxr_txbuf[i];
1424 			if (txb->vtxb_dmamap != NULL) {
1425 				bus_dmamap_destroy(sc->vmx_dmat,
1426 				    txb->vtxb_dmamap);
1427 				txb->vtxb_dmamap = NULL;
1428 			}
1429 		}
1430 
1431 		if (txc->vxcr_u.txcd != NULL) {
1432 			vmxnet3_dma_free(sc, &txc->vxcr_dma);
1433 			txc->vxcr_u.txcd = NULL;
1434 		}
1435 
1436 		if (txr->vxtxr_txd != NULL) {
1437 			vmxnet3_dma_free(sc, &txr->vxtxr_dma);
1438 			txr->vxtxr_txd = NULL;
1439 		}
1440 	}
1441 }
1442 
1443 static int
1444 vmxnet3_alloc_rxq_data(struct vmxnet3_softc *sc)
1445 {
1446 	device_t dev;
1447 	struct vmxnet3_rxqueue *rxq;
1448 	struct vmxnet3_rxring *rxr;
1449 	struct vmxnet3_comp_ring *rxc;
1450 	int descsz, compsz;
1451 	u_int i, j;
1452 	int q, error;
1453 
1454 	dev = sc->vmx_dev;
1455 
1456 	for (q = 0; q < sc->vmx_nrxqueues; q++) {
1457 		rxq = &sc->vmx_queue[q].vxq_rxqueue;
1458 		rxc = &rxq->vxrxq_comp_ring;
1459 		compsz = 0;
1460 
1461 		for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1462 			rxr = &rxq->vxrxq_cmd_ring[i];
1463 
1464 			descsz = rxr->vxrxr_ndesc *
1465 			    sizeof(struct vmxnet3_rxdesc);
1466 			compsz += rxr->vxrxr_ndesc *
1467 			    sizeof(struct vmxnet3_rxcompdesc);
1468 
1469 			error = vmxnet3_dma_malloc(sc, descsz, 512,
1470 			    &rxr->vxrxr_dma);
1471 			if (error) {
1472 				device_printf(dev, "cannot allocate Rx "
1473 				    "descriptors for queue %d/%d error %d\n",
1474 				    i, q, error);
1475 				return (error);
1476 			}
1477 			rxr->vxrxr_rxd =
1478 			    (struct vmxnet3_rxdesc *) rxr->vxrxr_dma.dma_vaddr;
1479 		}
1480 
1481 		error = vmxnet3_dma_malloc(sc, compsz, 512, &rxc->vxcr_dma);
1482 		if (error) {
1483 			device_printf(dev, "cannot alloc Rx comp descriptors "
1484 			    "for queue %d error %d\n", q, error);
1485 			return (error);
1486 		}
1487 		rxc->vxcr_u.rxcd =
1488 		    (struct vmxnet3_rxcompdesc *) rxc->vxcr_dma.dma_vaddr;
1489 
1490 		for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1491 			rxr = &rxq->vxrxq_cmd_ring[i];
1492 
1493 			error = bus_dmamap_create(sc->vmx_dmat, JUMBO_LEN, 1,
1494 			    JUMBO_LEN, 0, BUS_DMA_NOWAIT,
1495 			    &rxr->vxrxr_spare_dmap);
1496 			if (error) {
1497 				device_printf(dev, "unable to create spare "
1498 				    "dmamap for queue %d/%d error %d\n",
1499 				    q, i, error);
1500 				return (error);
1501 			}
1502 
1503 			for (j = 0; j < rxr->vxrxr_ndesc; j++) {
1504 				error = bus_dmamap_create(sc->vmx_dmat, JUMBO_LEN, 1,
1505 				    JUMBO_LEN, 0, BUS_DMA_NOWAIT,
1506 				    &rxr->vxrxr_rxbuf[j].vrxb_dmamap);
1507 				if (error) {
1508 					device_printf(dev, "unable to create "
1509 					    "dmamap for queue %d/%d slot %d "
1510 					    "error %d\n",
1511 					    q, i, j, error);
1512 					return (error);
1513 				}
1514 			}
1515 		}
1516 	}
1517 
1518 	return (0);
1519 }
1520 
1521 static void
1522 vmxnet3_free_rxq_data(struct vmxnet3_softc *sc)
1523 {
1524 	struct vmxnet3_rxqueue *rxq;
1525 	struct vmxnet3_rxring *rxr;
1526 	struct vmxnet3_comp_ring *rxc;
1527 	struct vmxnet3_rxbuf *rxb;
1528 	u_int i, j;
1529 	int q;
1530 
1531 	for (q = 0; q < sc->vmx_nrxqueues; q++) {
1532 		rxq = &sc->vmx_queue[q].vxq_rxqueue;
1533 		rxc = &rxq->vxrxq_comp_ring;
1534 
1535 		for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1536 			rxr = &rxq->vxrxq_cmd_ring[i];
1537 
1538 			if (rxr->vxrxr_spare_dmap != NULL) {
1539 				bus_dmamap_destroy(sc->vmx_dmat,
1540 				    rxr->vxrxr_spare_dmap);
1541 				rxr->vxrxr_spare_dmap = NULL;
1542 			}
1543 
1544 			for (j = 0; j < rxr->vxrxr_ndesc; j++) {
1545 				rxb = &rxr->vxrxr_rxbuf[j];
1546 				if (rxb->vrxb_dmamap != NULL) {
1547 					bus_dmamap_destroy(sc->vmx_dmat,
1548 					    rxb->vrxb_dmamap);
1549 					rxb->vrxb_dmamap = NULL;
1550 				}
1551 			}
1552 		}
1553 
1554 		if (rxc->vxcr_u.rxcd != NULL) {
1555 			vmxnet3_dma_free(sc, &rxc->vxcr_dma);
1556 			rxc->vxcr_u.rxcd = NULL;
1557 		}
1558 
1559 		for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1560 			rxr = &rxq->vxrxq_cmd_ring[i];
1561 
1562 			if (rxr->vxrxr_rxd != NULL) {
1563 				vmxnet3_dma_free(sc, &rxr->vxrxr_dma);
1564 				rxr->vxrxr_rxd = NULL;
1565 			}
1566 		}
1567 	}
1568 }
1569 
1570 static int
1571 vmxnet3_alloc_queue_data(struct vmxnet3_softc *sc)
1572 {
1573 	int error;
1574 
1575 	error = vmxnet3_alloc_txq_data(sc);
1576 	if (error)
1577 		return (error);
1578 
1579 	error = vmxnet3_alloc_rxq_data(sc);
1580 	if (error)
1581 		return (error);
1582 
1583 	return (0);
1584 }
1585 
1586 static void
1587 vmxnet3_free_queue_data(struct vmxnet3_softc *sc)
1588 {
1589 
1590 	if (sc->vmx_queue != NULL) {
1591 		vmxnet3_free_rxq_data(sc);
1592 		vmxnet3_free_txq_data(sc);
1593 	}
1594 }
1595 
1596 static int
1597 vmxnet3_alloc_mcast_table(struct vmxnet3_softc *sc)
1598 {
1599 	int error;
1600 
1601 	error = vmxnet3_dma_malloc(sc, VMXNET3_MULTICAST_MAX * ETHER_ADDR_LEN,
1602 	    32, &sc->vmx_mcast_dma);
1603 	if (error)
1604 		device_printf(sc->vmx_dev, "unable to alloc multicast table\n");
1605 	else
1606 		sc->vmx_mcast = sc->vmx_mcast_dma.dma_vaddr;
1607 
1608 	return (error);
1609 }
1610 
1611 static void
1612 vmxnet3_free_mcast_table(struct vmxnet3_softc *sc)
1613 {
1614 
1615 	if (sc->vmx_mcast != NULL) {
1616 		vmxnet3_dma_free(sc, &sc->vmx_mcast_dma);
1617 		sc->vmx_mcast = NULL;
1618 	}
1619 }
1620 
1621 static void
1622 vmxnet3_init_shared_data(struct vmxnet3_softc *sc)
1623 {
1624 	struct vmxnet3_driver_shared *ds;
1625 	struct vmxnet3_txqueue *txq;
1626 	struct vmxnet3_txq_shared *txs;
1627 	struct vmxnet3_rxqueue *rxq;
1628 	struct vmxnet3_rxq_shared *rxs;
1629 	int i;
1630 
1631 	ds = sc->vmx_ds;
1632 
1633 	/*
1634 	 * Initialize fields of the shared data that remains the same across
1635 	 * reinits. Note the shared data is zero'd when allocated.
1636 	 */
1637 
1638 	ds->magic = VMXNET3_REV1_MAGIC;
1639 
1640 	/* DriverInfo */
1641 	ds->version = VMXNET3_DRIVER_VERSION;
1642 	ds->guest = VMXNET3_GOS_FREEBSD |
1643 #ifdef __LP64__
1644 	    VMXNET3_GOS_64BIT;
1645 #else
1646 	    VMXNET3_GOS_32BIT;
1647 #endif
1648 	ds->vmxnet3_revision = 1;
1649 	ds->upt_version = 1;
1650 
1651 	/* Misc. conf */
1652 	ds->driver_data = vtophys(sc);
1653 	ds->driver_data_len = sizeof(struct vmxnet3_softc);
1654 	ds->queue_shared = sc->vmx_qs_dma.dma_paddr;
1655 	ds->queue_shared_len = sc->vmx_qs_dma.dma_size;
1656 	ds->nrxsg_max = sc->vmx_max_rxsegs;
1657 
1658 	/* RSS conf */
1659 	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
1660 		ds->rss.version = 1;
1661 		ds->rss.paddr = sc->vmx_rss_dma.dma_paddr;
1662 		ds->rss.len = sc->vmx_rss_dma.dma_size;
1663 	}
1664 
1665 	/* Interrupt control. */
1666 	ds->automask = sc->vmx_intr_mask_mode == VMXNET3_IMM_AUTO;
1667 	ds->nintr = sc->vmx_nintrs;
1668 	ds->evintr = sc->vmx_event_intr_idx;
1669 	ds->ictrl = VMXNET3_ICTRL_DISABLE_ALL;
1670 
1671 	for (i = 0; i < sc->vmx_nintrs; i++)
1672 		ds->modlevel[i] = UPT1_IMOD_ADAPTIVE;
1673 
1674 	/* Receive filter. */
1675 	ds->mcast_table = sc->vmx_mcast_dma.dma_paddr;
1676 	ds->mcast_tablelen = sc->vmx_mcast_dma.dma_size;
1677 
1678 	/* Tx queues */
1679 	for (i = 0; i < sc->vmx_ntxqueues; i++) {
1680 		txq = &sc->vmx_queue[i].vxq_txqueue;
1681 		txs = txq->vxtxq_ts;
1682 
1683 		txs->cmd_ring = txq->vxtxq_cmd_ring.vxtxr_dma.dma_paddr;
1684 		txs->cmd_ring_len = txq->vxtxq_cmd_ring.vxtxr_ndesc;
1685 		txs->comp_ring = txq->vxtxq_comp_ring.vxcr_dma.dma_paddr;
1686 		txs->comp_ring_len = txq->vxtxq_comp_ring.vxcr_ndesc;
1687 		txs->driver_data = vtophys(txq);
1688 		txs->driver_data_len = sizeof(struct vmxnet3_txqueue);
1689 	}
1690 
1691 	/* Rx queues */
1692 	for (i = 0; i < sc->vmx_nrxqueues; i++) {
1693 		rxq = &sc->vmx_queue[i].vxq_rxqueue;
1694 		rxs = rxq->vxrxq_rs;
1695 
1696 		rxs->cmd_ring[0] = rxq->vxrxq_cmd_ring[0].vxrxr_dma.dma_paddr;
1697 		rxs->cmd_ring_len[0] = rxq->vxrxq_cmd_ring[0].vxrxr_ndesc;
1698 		rxs->cmd_ring[1] = rxq->vxrxq_cmd_ring[1].vxrxr_dma.dma_paddr;
1699 		rxs->cmd_ring_len[1] = rxq->vxrxq_cmd_ring[1].vxrxr_ndesc;
1700 		rxs->comp_ring = rxq->vxrxq_comp_ring.vxcr_dma.dma_paddr;
1701 		rxs->comp_ring_len = rxq->vxrxq_comp_ring.vxcr_ndesc;
1702 		rxs->driver_data = vtophys(rxq);
1703 		rxs->driver_data_len = sizeof(struct vmxnet3_rxqueue);
1704 	}
1705 }
1706 
1707 static void
1708 vmxnet3_reinit_rss_shared_data(struct vmxnet3_softc *sc)
1709 {
1710 	/*
1711 	 * Use the same key as the Linux driver until FreeBSD can do
1712 	 * RSS (presumably Toeplitz) in software.
1713 	 */
1714 	static const uint8_t rss_key[UPT1_RSS_MAX_KEY_SIZE] = {
1715 	    0x3b, 0x56, 0xd1, 0x56, 0x13, 0x4a, 0xe7, 0xac,
1716 	    0xe8, 0x79, 0x09, 0x75, 0xe8, 0x65, 0x79, 0x28,
1717 	    0x35, 0x12, 0xb9, 0x56, 0x7c, 0x76, 0x4b, 0x70,
1718 	    0xd8, 0x56, 0xa3, 0x18, 0x9b, 0x0a, 0xee, 0xf3,
1719 	    0x96, 0xa6, 0x9f, 0x8f, 0x9e, 0x8c, 0x90, 0xc9,
1720 	};
1721 
1722 	struct vmxnet3_rss_shared *rss;
1723 	int i;
1724 
1725 	rss = sc->vmx_rss;
1726 
1727 	rss->hash_type =
1728 	    UPT1_RSS_HASH_TYPE_IPV4 | UPT1_RSS_HASH_TYPE_TCP_IPV4 |
1729 	    UPT1_RSS_HASH_TYPE_IPV6 | UPT1_RSS_HASH_TYPE_TCP_IPV6;
1730 	rss->hash_func = UPT1_RSS_HASH_FUNC_TOEPLITZ;
1731 	rss->hash_key_size = UPT1_RSS_MAX_KEY_SIZE;
1732 	rss->ind_table_size = UPT1_RSS_MAX_IND_TABLE_SIZE;
1733 	memcpy(rss->hash_key, rss_key, UPT1_RSS_MAX_KEY_SIZE);
1734 
1735 	for (i = 0; i < UPT1_RSS_MAX_IND_TABLE_SIZE; i++)
1736 		rss->ind_table[i] = i % sc->vmx_nrxqueues;
1737 }
1738 
1739 static void
1740 vmxnet3_reinit_shared_data(struct vmxnet3_softc *sc)
1741 {
1742 	struct ifnet *ifp;
1743 	struct vmxnet3_driver_shared *ds;
1744 
1745 	ifp = &sc->vmx_ethercom.ec_if;
1746 	ds = sc->vmx_ds;
1747 
1748 	ds->mtu = ifp->if_mtu;
1749 	ds->ntxqueue = sc->vmx_ntxqueues;
1750 	ds->nrxqueue = sc->vmx_nrxqueues;
1751 
1752 	ds->upt_features = 0;
1753 	if (ifp->if_capenable &
1754 	    (IFCAP_CSUM_IPv4_Rx | IFCAP_CSUM_TCPv4_Rx | IFCAP_CSUM_UDPv4_Rx |
1755 	    IFCAP_CSUM_TCPv6_Rx | IFCAP_CSUM_UDPv6_Rx))
1756 		ds->upt_features |= UPT1_F_CSUM;
1757 	if (sc->vmx_ethercom.ec_capenable & ETHERCAP_VLAN_HWTAGGING)
1758 		ds->upt_features |= UPT1_F_VLAN;
1759 
1760 	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
1761 		ds->upt_features |= UPT1_F_RSS;
1762 		vmxnet3_reinit_rss_shared_data(sc);
1763 	}
1764 
1765 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_DSL, sc->vmx_ds_dma.dma_paddr);
1766 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_DSH,
1767 	    (uint64_t) sc->vmx_ds_dma.dma_paddr >> 32);
1768 }
1769 
1770 static int
1771 vmxnet3_alloc_data(struct vmxnet3_softc *sc)
1772 {
1773 	int error;
1774 
1775 	error = vmxnet3_alloc_shared_data(sc);
1776 	if (error)
1777 		return (error);
1778 
1779 	error = vmxnet3_alloc_queue_data(sc);
1780 	if (error)
1781 		return (error);
1782 
1783 	error = vmxnet3_alloc_mcast_table(sc);
1784 	if (error)
1785 		return (error);
1786 
1787 	vmxnet3_init_shared_data(sc);
1788 
1789 	return (0);
1790 }
1791 
1792 static void
1793 vmxnet3_free_data(struct vmxnet3_softc *sc)
1794 {
1795 
1796 	vmxnet3_free_mcast_table(sc);
1797 	vmxnet3_free_queue_data(sc);
1798 	vmxnet3_free_shared_data(sc);
1799 }
1800 
1801 static int
1802 vmxnet3_setup_interface(struct vmxnet3_softc *sc)
1803 {
1804 	struct ifnet *ifp = &sc->vmx_ethercom.ec_if;
1805 
1806 	vmxnet3_get_lladdr(sc);
1807 	aprint_normal_dev(sc->vmx_dev, "Ethernet address %s\n",
1808 	    ether_sprintf(sc->vmx_lladdr));
1809 	vmxnet3_set_lladdr(sc);
1810 
1811 	strlcpy(ifp->if_xname, device_xname(sc->vmx_dev), IFNAMSIZ);
1812 	ifp->if_softc = sc;
1813 	ifp->if_flags = IFF_BROADCAST | IFF_MULTICAST | IFF_SIMPLEX;
1814 	ifp->if_extflags = IFEF_MPSAFE;
1815 	ifp->if_ioctl = vmxnet3_ioctl;
1816 	ifp->if_start = vmxnet3_start;
1817 	ifp->if_transmit = vmxnet3_transmit;
1818 	ifp->if_watchdog = NULL;
1819 	ifp->if_init = vmxnet3_init;
1820 	ifp->if_stop = vmxnet3_stop;
1821 	sc->vmx_ethercom.ec_if.if_capabilities |=IFCAP_CSUM_IPv4_Rx |
1822 		    IFCAP_CSUM_TCPv4_Tx | IFCAP_CSUM_TCPv4_Rx |
1823 		    IFCAP_CSUM_UDPv4_Tx | IFCAP_CSUM_UDPv4_Rx |
1824 		    IFCAP_CSUM_TCPv6_Tx | IFCAP_CSUM_TCPv6_Rx |
1825 		    IFCAP_CSUM_UDPv6_Tx | IFCAP_CSUM_UDPv6_Rx;
1826 
1827 	ifp->if_capenable = ifp->if_capabilities;
1828 
1829 	sc->vmx_ethercom.ec_if.if_capabilities |= IFCAP_TSOv4 | IFCAP_TSOv6;
1830 
1831 	sc->vmx_ethercom.ec_capabilities |=
1832 	    ETHERCAP_VLAN_MTU | ETHERCAP_VLAN_HWTAGGING | ETHERCAP_JUMBO_MTU;
1833 	sc->vmx_ethercom.ec_capenable |= ETHERCAP_VLAN_HWTAGGING;
1834 
1835 	IFQ_SET_MAXLEN(&ifp->if_snd, sc->vmx_ntxdescs);
1836 	IFQ_SET_READY(&ifp->if_snd);
1837 
1838 	/* Initialize ifmedia structures. */
1839 	sc->vmx_ethercom.ec_ifmedia = &sc->vmx_media;
1840 	ifmedia_init_with_lock(&sc->vmx_media, IFM_IMASK, vmxnet3_ifmedia_change,
1841 	    vmxnet3_ifmedia_status, sc->vmx_mtx);
1842 	ifmedia_add(&sc->vmx_media, IFM_ETHER | IFM_AUTO, 0, NULL);
1843 	ifmedia_add(&sc->vmx_media, IFM_ETHER | IFM_10G_T | IFM_FDX, 0, NULL);
1844 	ifmedia_add(&sc->vmx_media, IFM_ETHER | IFM_10G_T, 0, NULL);
1845 	ifmedia_add(&sc->vmx_media, IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
1846 	ifmedia_add(&sc->vmx_media, IFM_ETHER | IFM_1000_T, 0, NULL);
1847 	ifmedia_set(&sc->vmx_media, IFM_ETHER | IFM_AUTO);
1848 
1849 	if_attach(ifp);
1850 	if_deferred_start_init(ifp, NULL);
1851 	ether_ifattach(ifp, sc->vmx_lladdr);
1852 	ether_set_ifflags_cb(&sc->vmx_ethercom, vmxnet3_ifflags_cb);
1853 	vmxnet3_cmd_link_status(ifp);
1854 
1855 	/* should set before setting interrupts */
1856 	sc->vmx_rx_intr_process_limit = VMXNET3_RX_INTR_PROCESS_LIMIT;
1857 	sc->vmx_rx_process_limit = VMXNET3_RX_PROCESS_LIMIT;
1858 	sc->vmx_tx_intr_process_limit = VMXNET3_TX_INTR_PROCESS_LIMIT;
1859 	sc->vmx_tx_process_limit = VMXNET3_TX_PROCESS_LIMIT;
1860 
1861 	return (0);
1862 }
1863 
1864 static int
1865 vmxnet3_setup_sysctl(struct vmxnet3_softc *sc)
1866 {
1867 	const char *devname;
1868 	struct sysctllog **log;
1869 	const struct sysctlnode *rnode, *rxnode, *txnode;
1870 	int error;
1871 
1872 	log = &sc->vmx_sysctllog;
1873 	devname = device_xname(sc->vmx_dev);
1874 
1875 	error = sysctl_createv(log, 0, NULL, &rnode,
1876 	    0, CTLTYPE_NODE, devname,
1877 	    SYSCTL_DESCR("vmxnet3 information and settings"),
1878 	    NULL, 0, NULL, 0, CTL_HW, CTL_CREATE, CTL_EOL);
1879 	if (error)
1880 		goto out;
1881 	error = sysctl_createv(log, 0, &rnode, NULL,
1882 	    CTLFLAG_READWRITE, CTLTYPE_BOOL, "txrx_workqueue",
1883 	    SYSCTL_DESCR("Use workqueue for packet processing"),
1884 	    NULL, 0, &sc->vmx_txrx_workqueue, 0, CTL_CREATE, CTL_EOL);
1885 	if (error)
1886 		goto out;
1887 
1888 	error = sysctl_createv(log, 0, &rnode, &rxnode,
1889 	    0, CTLTYPE_NODE, "rx",
1890 	    SYSCTL_DESCR("vmxnet3 information and settings for Rx"),
1891 	    NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL);
1892 	if (error)
1893 		goto out;
1894 	error = sysctl_createv(log, 0, &rxnode, NULL,
1895 	    CTLFLAG_READWRITE, CTLTYPE_INT, "intr_process_limit",
1896 	    SYSCTL_DESCR("max number of Rx packets to process for interrupt processing"),
1897 	    NULL, 0, &sc->vmx_rx_intr_process_limit, 0, CTL_CREATE, CTL_EOL);
1898 	if (error)
1899 		goto out;
1900 	error = sysctl_createv(log, 0, &rxnode, NULL,
1901 	    CTLFLAG_READWRITE, CTLTYPE_INT, "process_limit",
1902 	    SYSCTL_DESCR("max number of Rx packets to process for deferred processing"),
1903 	    NULL, 0, &sc->vmx_rx_process_limit, 0, CTL_CREATE, CTL_EOL);
1904 	if (error)
1905 		goto out;
1906 
1907 	error = sysctl_createv(log, 0, &rnode, &txnode,
1908 	    0, CTLTYPE_NODE, "tx",
1909 	    SYSCTL_DESCR("vmxnet3 information and settings for Tx"),
1910 	    NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL);
1911 	if (error)
1912 		goto out;
1913 	error = sysctl_createv(log, 0, &txnode, NULL,
1914 	    CTLFLAG_READWRITE, CTLTYPE_INT, "intr_process_limit",
1915 	    SYSCTL_DESCR("max number of Tx packets to process for interrupt processing"),
1916 	    NULL, 0, &sc->vmx_tx_intr_process_limit, 0, CTL_CREATE, CTL_EOL);
1917 	if (error)
1918 		goto out;
1919 	error = sysctl_createv(log, 0, &txnode, NULL,
1920 	    CTLFLAG_READWRITE, CTLTYPE_INT, "process_limit",
1921 	    SYSCTL_DESCR("max number of Tx packets to process for deferred processing"),
1922 	    NULL, 0, &sc->vmx_tx_process_limit, 0, CTL_CREATE, CTL_EOL);
1923 
1924 out:
1925 	if (error) {
1926 		aprint_error_dev(sc->vmx_dev,
1927 		    "unable to create sysctl node\n");
1928 		sysctl_teardown(log);
1929 	}
1930 	return error;
1931 }
1932 
1933 static int
1934 vmxnet3_setup_stats(struct vmxnet3_softc *sc)
1935 {
1936 	struct vmxnet3_queue *vmxq;
1937 	struct vmxnet3_txqueue *txq;
1938 	struct vmxnet3_rxqueue *rxq;
1939 	int i;
1940 
1941 	for (i = 0; i < sc->vmx_ntxqueues; i++) {
1942 		vmxq = &sc->vmx_queue[i];
1943 		txq = &vmxq->vxq_txqueue;
1944 		evcnt_attach_dynamic(&txq->vxtxq_intr, EVCNT_TYPE_INTR,
1945 		    NULL, txq->vxtxq_name, "Interrupt on queue");
1946 		evcnt_attach_dynamic(&txq->vxtxq_defer, EVCNT_TYPE_MISC,
1947 		    NULL, txq->vxtxq_name, "Handled queue in softint/workqueue");
1948 		evcnt_attach_dynamic(&txq->vxtxq_deferreq, EVCNT_TYPE_MISC,
1949 		    NULL, txq->vxtxq_name, "Requested in softint/workqueue");
1950 		evcnt_attach_dynamic(&txq->vxtxq_pcqdrop, EVCNT_TYPE_MISC,
1951 		    NULL, txq->vxtxq_name, "Dropped in pcq");
1952 		evcnt_attach_dynamic(&txq->vxtxq_transmitdef, EVCNT_TYPE_MISC,
1953 		    NULL, txq->vxtxq_name, "Deferred transmit");
1954 		evcnt_attach_dynamic(&txq->vxtxq_watchdogto, EVCNT_TYPE_MISC,
1955 		    NULL, txq->vxtxq_name, "Watchdog timeout");
1956 		evcnt_attach_dynamic(&txq->vxtxq_defragged, EVCNT_TYPE_MISC,
1957 		    NULL, txq->vxtxq_name, "m_defrag successed");
1958 		evcnt_attach_dynamic(&txq->vxtxq_defrag_failed, EVCNT_TYPE_MISC,
1959 		    NULL, txq->vxtxq_name, "m_defrag failed");
1960 	}
1961 
1962 	for (i = 0; i < sc->vmx_nrxqueues; i++) {
1963 		vmxq = &sc->vmx_queue[i];
1964 		rxq = &vmxq->vxq_rxqueue;
1965 		evcnt_attach_dynamic(&rxq->vxrxq_intr, EVCNT_TYPE_INTR,
1966 		    NULL, rxq->vxrxq_name, "Interrupt on queue");
1967 		evcnt_attach_dynamic(&rxq->vxrxq_defer, EVCNT_TYPE_MISC,
1968 		    NULL, rxq->vxrxq_name, "Handled queue in softint/workqueue");
1969 		evcnt_attach_dynamic(&rxq->vxrxq_deferreq, EVCNT_TYPE_MISC,
1970 		    NULL, rxq->vxrxq_name, "Requested in softint/workqueue");
1971 		evcnt_attach_dynamic(&rxq->vxrxq_mgetcl_failed, EVCNT_TYPE_MISC,
1972 		    NULL, rxq->vxrxq_name, "MCLGET failed");
1973 		evcnt_attach_dynamic(&rxq->vxrxq_mbuf_load_failed, EVCNT_TYPE_MISC,
1974 		    NULL, rxq->vxrxq_name, "bus_dmamap_load_mbuf failed");
1975 	}
1976 
1977 	evcnt_attach_dynamic(&sc->vmx_event_intr, EVCNT_TYPE_INTR,
1978 	    NULL, device_xname(sc->vmx_dev), "Interrupt for other events");
1979 	evcnt_attach_dynamic(&sc->vmx_event_link, EVCNT_TYPE_MISC,
1980 	    NULL, device_xname(sc->vmx_dev), "Link status event");
1981 	evcnt_attach_dynamic(&sc->vmx_event_txqerror, EVCNT_TYPE_MISC,
1982 	    NULL, device_xname(sc->vmx_dev), "Tx queue error event");
1983 	evcnt_attach_dynamic(&sc->vmx_event_rxqerror, EVCNT_TYPE_MISC,
1984 	    NULL, device_xname(sc->vmx_dev), "Rx queue error event");
1985 	evcnt_attach_dynamic(&sc->vmx_event_dic, EVCNT_TYPE_MISC,
1986 	    NULL, device_xname(sc->vmx_dev), "Device impl change event");
1987 	evcnt_attach_dynamic(&sc->vmx_event_debug, EVCNT_TYPE_MISC,
1988 	    NULL, device_xname(sc->vmx_dev), "Debug event");
1989 
1990 	return 0;
1991 }
1992 
1993 static void
1994 vmxnet3_teardown_stats(struct vmxnet3_softc *sc)
1995 {
1996 	struct vmxnet3_queue *vmxq;
1997 	struct vmxnet3_txqueue *txq;
1998 	struct vmxnet3_rxqueue *rxq;
1999 	int i;
2000 
2001 	for (i = 0; i < sc->vmx_ntxqueues; i++) {
2002 		vmxq = &sc->vmx_queue[i];
2003 		txq = &vmxq->vxq_txqueue;
2004 		evcnt_detach(&txq->vxtxq_intr);
2005 		evcnt_detach(&txq->vxtxq_defer);
2006 		evcnt_detach(&txq->vxtxq_deferreq);
2007 		evcnt_detach(&txq->vxtxq_pcqdrop);
2008 		evcnt_detach(&txq->vxtxq_transmitdef);
2009 		evcnt_detach(&txq->vxtxq_watchdogto);
2010 		evcnt_detach(&txq->vxtxq_defragged);
2011 		evcnt_detach(&txq->vxtxq_defrag_failed);
2012 	}
2013 
2014 	for (i = 0; i < sc->vmx_nrxqueues; i++) {
2015 		vmxq = &sc->vmx_queue[i];
2016 		rxq = &vmxq->vxq_rxqueue;
2017 		evcnt_detach(&rxq->vxrxq_intr);
2018 		evcnt_detach(&rxq->vxrxq_defer);
2019 		evcnt_detach(&rxq->vxrxq_deferreq);
2020 		evcnt_detach(&rxq->vxrxq_mgetcl_failed);
2021 		evcnt_detach(&rxq->vxrxq_mbuf_load_failed);
2022 	}
2023 
2024 	evcnt_detach(&sc->vmx_event_intr);
2025 	evcnt_detach(&sc->vmx_event_link);
2026 	evcnt_detach(&sc->vmx_event_txqerror);
2027 	evcnt_detach(&sc->vmx_event_rxqerror);
2028 	evcnt_detach(&sc->vmx_event_dic);
2029 	evcnt_detach(&sc->vmx_event_debug);
2030 }
2031 
2032 static void
2033 vmxnet3_evintr(struct vmxnet3_softc *sc)
2034 {
2035 	device_t dev;
2036 	struct vmxnet3_txq_shared *ts;
2037 	struct vmxnet3_rxq_shared *rs;
2038 	uint32_t event;
2039 	int reset;
2040 
2041 	dev = sc->vmx_dev;
2042 	reset = 0;
2043 
2044 	VMXNET3_CORE_LOCK(sc);
2045 
2046 	/* Clear events. */
2047 	event = sc->vmx_ds->event;
2048 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_EVENT, event);
2049 
2050 	if (event & VMXNET3_EVENT_LINK) {
2051 		sc->vmx_event_link.ev_count++;
2052 		vmxnet3_if_link_status(sc);
2053 		if (sc->vmx_link_active != 0)
2054 			if_schedule_deferred_start(&sc->vmx_ethercom.ec_if);
2055 	}
2056 
2057 	if (event & (VMXNET3_EVENT_TQERROR | VMXNET3_EVENT_RQERROR)) {
2058 		if (event & VMXNET3_EVENT_TQERROR)
2059 			sc->vmx_event_txqerror.ev_count++;
2060 		if (event & VMXNET3_EVENT_RQERROR)
2061 			sc->vmx_event_rxqerror.ev_count++;
2062 
2063 		reset = 1;
2064 		vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_STATUS);
2065 		ts = sc->vmx_queue[0].vxq_txqueue.vxtxq_ts;
2066 		if (ts->stopped != 0)
2067 			device_printf(dev, "Tx queue error %#x\n", ts->error);
2068 		rs = sc->vmx_queue[0].vxq_rxqueue.vxrxq_rs;
2069 		if (rs->stopped != 0)
2070 			device_printf(dev, "Rx queue error %#x\n", rs->error);
2071 		device_printf(dev, "Rx/Tx queue error event ... resetting\n");
2072 	}
2073 
2074 	if (event & VMXNET3_EVENT_DIC) {
2075 		sc->vmx_event_dic.ev_count++;
2076 		device_printf(dev, "device implementation change event\n");
2077 	}
2078 	if (event & VMXNET3_EVENT_DEBUG) {
2079 		sc->vmx_event_debug.ev_count++;
2080 		device_printf(dev, "debug event\n");
2081 	}
2082 
2083 	if (reset != 0)
2084 		vmxnet3_init_locked(sc);
2085 
2086 	VMXNET3_CORE_UNLOCK(sc);
2087 }
2088 
2089 static bool
2090 vmxnet3_txq_eof(struct vmxnet3_txqueue *txq, u_int limit)
2091 {
2092 	struct vmxnet3_softc *sc;
2093 	struct vmxnet3_txring *txr;
2094 	struct vmxnet3_comp_ring *txc;
2095 	struct vmxnet3_txcompdesc *txcd;
2096 	struct vmxnet3_txbuf *txb;
2097 	struct ifnet *ifp;
2098 	struct mbuf *m;
2099 	u_int sop;
2100 	bool more = false;
2101 
2102 	sc = txq->vxtxq_sc;
2103 	txr = &txq->vxtxq_cmd_ring;
2104 	txc = &txq->vxtxq_comp_ring;
2105 	ifp = &sc->vmx_ethercom.ec_if;
2106 
2107 	VMXNET3_TXQ_LOCK_ASSERT(txq);
2108 
2109 	net_stat_ref_t nsr = IF_STAT_GETREF(ifp);
2110 	for (;;) {
2111 		if (limit-- == 0) {
2112 			more = true;
2113 			break;
2114 		}
2115 
2116 		txcd = &txc->vxcr_u.txcd[txc->vxcr_next];
2117 		if (txcd->gen != txc->vxcr_gen)
2118 			break;
2119 		vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
2120 
2121 		if (++txc->vxcr_next == txc->vxcr_ndesc) {
2122 			txc->vxcr_next = 0;
2123 			txc->vxcr_gen ^= 1;
2124 		}
2125 
2126 		sop = txr->vxtxr_next;
2127 		txb = &txr->vxtxr_txbuf[sop];
2128 
2129 		if ((m = txb->vtxb_m) != NULL) {
2130 			bus_dmamap_sync(sc->vmx_dmat, txb->vtxb_dmamap,
2131 			    0, txb->vtxb_dmamap->dm_mapsize,
2132 			    BUS_DMASYNC_POSTWRITE);
2133 			bus_dmamap_unload(sc->vmx_dmat, txb->vtxb_dmamap);
2134 
2135 			if_statinc_ref(nsr, if_opackets);
2136 			if_statadd_ref(nsr, if_obytes, m->m_pkthdr.len);
2137 			if (m->m_flags & M_MCAST)
2138 				if_statinc_ref(nsr, if_omcasts);
2139 
2140 			m_freem(m);
2141 			txb->vtxb_m = NULL;
2142 		}
2143 
2144 		txr->vxtxr_next = (txcd->eop_idx + 1) % txr->vxtxr_ndesc;
2145 	}
2146 	IF_STAT_PUTREF(ifp);
2147 
2148 	if (txr->vxtxr_head == txr->vxtxr_next)
2149 		txq->vxtxq_watchdog = 0;
2150 
2151 	return more;
2152 }
2153 
2154 static int
2155 vmxnet3_newbuf(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rxq,
2156     struct vmxnet3_rxring *rxr)
2157 {
2158 	struct mbuf *m;
2159 	struct vmxnet3_rxdesc *rxd;
2160 	struct vmxnet3_rxbuf *rxb;
2161 	bus_dma_tag_t tag;
2162 	bus_dmamap_t dmap;
2163 	int idx, btype, error;
2164 
2165 	tag = sc->vmx_dmat;
2166 	dmap = rxr->vxrxr_spare_dmap;
2167 	idx = rxr->vxrxr_fill;
2168 	rxd = &rxr->vxrxr_rxd[idx];
2169 	rxb = &rxr->vxrxr_rxbuf[idx];
2170 
2171 	/* Don't allocate buffers for ring 2 for now. */
2172 	if (rxr->vxrxr_rid != 0)
2173 		return -1;
2174 	btype = VMXNET3_BTYPE_HEAD;
2175 
2176 	MGETHDR(m, M_DONTWAIT, MT_DATA);
2177 	if (m == NULL)
2178 		return (ENOBUFS);
2179 
2180 	MCLGET(m, M_DONTWAIT);
2181 	if ((m->m_flags & M_EXT) == 0) {
2182 		rxq->vxrxq_mgetcl_failed.ev_count++;
2183 		m_freem(m);
2184 		return (ENOBUFS);
2185 	}
2186 
2187 	m->m_pkthdr.len = m->m_len = JUMBO_LEN;
2188 	m_adj(m, ETHER_ALIGN);
2189 
2190 	error = bus_dmamap_load_mbuf(sc->vmx_dmat, dmap, m, BUS_DMA_NOWAIT);
2191 	if (error) {
2192 		m_freem(m);
2193 		rxq->vxrxq_mbuf_load_failed.ev_count++;
2194 		return (error);
2195 	}
2196 
2197 	if (rxb->vrxb_m != NULL) {
2198 		bus_dmamap_sync(tag, rxb->vrxb_dmamap,
2199 		    0, rxb->vrxb_dmamap->dm_mapsize,
2200 		    BUS_DMASYNC_POSTREAD);
2201 		bus_dmamap_unload(tag, rxb->vrxb_dmamap);
2202 	}
2203 
2204 	rxr->vxrxr_spare_dmap = rxb->vrxb_dmamap;
2205 	rxb->vrxb_dmamap = dmap;
2206 	rxb->vrxb_m = m;
2207 
2208 	rxd->addr = DMAADDR(dmap);
2209 	rxd->len = m->m_pkthdr.len;
2210 	rxd->btype = btype;
2211 	rxd->gen = rxr->vxrxr_gen;
2212 
2213 	vmxnet3_rxr_increment_fill(rxr);
2214 	return (0);
2215 }
2216 
2217 static void
2218 vmxnet3_rxq_eof_discard(struct vmxnet3_rxqueue *rxq,
2219     struct vmxnet3_rxring *rxr, int idx)
2220 {
2221 	struct vmxnet3_rxdesc *rxd;
2222 
2223 	rxd = &rxr->vxrxr_rxd[idx];
2224 	rxd->gen = rxr->vxrxr_gen;
2225 	vmxnet3_rxr_increment_fill(rxr);
2226 }
2227 
2228 static void
2229 vmxnet3_rxq_discard_chain(struct vmxnet3_rxqueue *rxq)
2230 {
2231 	struct vmxnet3_softc *sc;
2232 	struct vmxnet3_rxring *rxr;
2233 	struct vmxnet3_comp_ring *rxc;
2234 	struct vmxnet3_rxcompdesc *rxcd;
2235 	int idx, eof;
2236 
2237 	sc = rxq->vxrxq_sc;
2238 	rxc = &rxq->vxrxq_comp_ring;
2239 
2240 	do {
2241 		rxcd = &rxc->vxcr_u.rxcd[rxc->vxcr_next];
2242 		if (rxcd->gen != rxc->vxcr_gen)
2243 			break;		/* Not expected. */
2244 		vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
2245 
2246 		if (++rxc->vxcr_next == rxc->vxcr_ndesc) {
2247 			rxc->vxcr_next = 0;
2248 			rxc->vxcr_gen ^= 1;
2249 		}
2250 
2251 		idx = rxcd->rxd_idx;
2252 		eof = rxcd->eop;
2253 		if (rxcd->qid < sc->vmx_nrxqueues)
2254 			rxr = &rxq->vxrxq_cmd_ring[0];
2255 		else
2256 			rxr = &rxq->vxrxq_cmd_ring[1];
2257 		vmxnet3_rxq_eof_discard(rxq, rxr, idx);
2258 	} while (!eof);
2259 }
2260 
2261 static void
2262 vmxnet3_rx_csum(struct vmxnet3_rxcompdesc *rxcd, struct mbuf *m)
2263 {
2264 	if (rxcd->no_csum)
2265 		return;
2266 
2267 	if (rxcd->ipv4) {
2268 		m->m_pkthdr.csum_flags |= M_CSUM_IPv4;
2269 		if (rxcd->ipcsum_ok == 0)
2270 			m->m_pkthdr.csum_flags |= M_CSUM_IPv4_BAD;
2271 	}
2272 
2273 	if (rxcd->fragment)
2274 		return;
2275 
2276 	if (rxcd->tcp) {
2277 		m->m_pkthdr.csum_flags |=
2278 		    rxcd->ipv4 ? M_CSUM_TCPv4 : M_CSUM_TCPv6;
2279 		if ((rxcd->csum_ok) == 0)
2280 			m->m_pkthdr.csum_flags |= M_CSUM_TCP_UDP_BAD;
2281 	}
2282 
2283 	if (rxcd->udp) {
2284 		m->m_pkthdr.csum_flags |=
2285 		    rxcd->ipv4 ? M_CSUM_UDPv4 : M_CSUM_UDPv6 ;
2286 		if ((rxcd->csum_ok) == 0)
2287 			m->m_pkthdr.csum_flags |= M_CSUM_TCP_UDP_BAD;
2288 	}
2289 }
2290 
2291 static void
2292 vmxnet3_rxq_input(struct vmxnet3_rxqueue *rxq,
2293     struct vmxnet3_rxcompdesc *rxcd, struct mbuf *m)
2294 {
2295 	struct vmxnet3_softc *sc;
2296 	struct ifnet *ifp;
2297 
2298 	sc = rxq->vxrxq_sc;
2299 	ifp = &sc->vmx_ethercom.ec_if;
2300 
2301 	if (rxcd->error) {
2302 		if_statinc(ifp, if_ierrors);
2303 		m_freem(m);
2304 		return;
2305 	}
2306 
2307 	if (!rxcd->no_csum)
2308 		vmxnet3_rx_csum(rxcd, m);
2309 	if (rxcd->vlan)
2310 		vlan_set_tag(m, rxcd->vtag);
2311 
2312 	net_stat_ref_t nsr = IF_STAT_GETREF(ifp);
2313 	if_statinc_ref(nsr, if_ipackets);
2314 	if_statadd_ref(nsr, if_ibytes, m->m_pkthdr.len);
2315 	IF_STAT_PUTREF(ifp);
2316 
2317 	if_percpuq_enqueue(ifp->if_percpuq, m);
2318 }
2319 
2320 static bool
2321 vmxnet3_rxq_eof(struct vmxnet3_rxqueue *rxq, u_int limit)
2322 {
2323 	struct vmxnet3_softc *sc;
2324 	struct ifnet *ifp;
2325 	struct vmxnet3_rxring *rxr;
2326 	struct vmxnet3_comp_ring *rxc;
2327 	struct vmxnet3_rxdesc *rxd __diagused;
2328 	struct vmxnet3_rxcompdesc *rxcd;
2329 	struct mbuf *m, *m_head, *m_tail;
2330 	u_int idx, length;
2331 	bool more = false;
2332 
2333 	sc = rxq->vxrxq_sc;
2334 	ifp = &sc->vmx_ethercom.ec_if;
2335 	rxc = &rxq->vxrxq_comp_ring;
2336 
2337 	VMXNET3_RXQ_LOCK_ASSERT(rxq);
2338 
2339 	if ((ifp->if_flags & IFF_RUNNING) == 0)
2340 		return more;
2341 
2342 	m_head = rxq->vxrxq_mhead;
2343 	rxq->vxrxq_mhead = NULL;
2344 	m_tail = rxq->vxrxq_mtail;
2345 	rxq->vxrxq_mtail = NULL;
2346 	KASSERT(m_head == NULL || m_tail != NULL);
2347 
2348 	for (;;) {
2349 		if (limit-- == 0) {
2350 			more = true;
2351 			break;
2352 		}
2353 
2354 		rxcd = &rxc->vxcr_u.rxcd[rxc->vxcr_next];
2355 		if (rxcd->gen != rxc->vxcr_gen) {
2356 			rxq->vxrxq_mhead = m_head;
2357 			rxq->vxrxq_mtail = m_tail;
2358 			break;
2359 		}
2360 		vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
2361 
2362 		if (++rxc->vxcr_next == rxc->vxcr_ndesc) {
2363 			rxc->vxcr_next = 0;
2364 			rxc->vxcr_gen ^= 1;
2365 		}
2366 
2367 		idx = rxcd->rxd_idx;
2368 		length = rxcd->len;
2369 		if (rxcd->qid < sc->vmx_nrxqueues)
2370 			rxr = &rxq->vxrxq_cmd_ring[0];
2371 		else
2372 			rxr = &rxq->vxrxq_cmd_ring[1];
2373 		rxd = &rxr->vxrxr_rxd[idx];
2374 
2375 		m = rxr->vxrxr_rxbuf[idx].vrxb_m;
2376 		KASSERT(m != NULL);
2377 
2378 		/*
2379 		 * The host may skip descriptors. We detect this when this
2380 		 * descriptor does not match the previous fill index. Catch
2381 		 * up with the host now.
2382 		 */
2383 		if (__predict_false(rxr->vxrxr_fill != idx)) {
2384 			while (rxr->vxrxr_fill != idx) {
2385 				rxr->vxrxr_rxd[rxr->vxrxr_fill].gen =
2386 				    rxr->vxrxr_gen;
2387 				vmxnet3_rxr_increment_fill(rxr);
2388 			}
2389 		}
2390 
2391 		if (rxcd->sop) {
2392 			/* start of frame w/o head buffer */
2393 			KASSERT(rxd->btype == VMXNET3_BTYPE_HEAD);
2394 			/* start of frame not in ring 0 */
2395 			KASSERT(rxr == &rxq->vxrxq_cmd_ring[0]);
2396 			/* duplicate start of frame? */
2397 			KASSERT(m_head == NULL);
2398 
2399 			if (length == 0) {
2400 				/* Just ignore this descriptor. */
2401 				vmxnet3_rxq_eof_discard(rxq, rxr, idx);
2402 				goto nextp;
2403 			}
2404 
2405 			if (vmxnet3_newbuf(sc, rxq, rxr) != 0) {
2406 				if_statinc(ifp, if_iqdrops);
2407 				vmxnet3_rxq_eof_discard(rxq, rxr, idx);
2408 				if (!rxcd->eop)
2409 					vmxnet3_rxq_discard_chain(rxq);
2410 				goto nextp;
2411 			}
2412 
2413 			m_set_rcvif(m, ifp);
2414 			m->m_pkthdr.len = m->m_len = length;
2415 			m->m_pkthdr.csum_flags = 0;
2416 			m_head = m_tail = m;
2417 
2418 		} else {
2419 			/* non start of frame w/o body buffer */
2420 			KASSERT(rxd->btype == VMXNET3_BTYPE_BODY);
2421 			/* frame not started? */
2422 			KASSERT(m_head != NULL);
2423 
2424 			if (vmxnet3_newbuf(sc, rxq, rxr) != 0) {
2425 				if_statinc(ifp, if_iqdrops);
2426 				vmxnet3_rxq_eof_discard(rxq, rxr, idx);
2427 				if (!rxcd->eop)
2428 					vmxnet3_rxq_discard_chain(rxq);
2429 				m_freem(m_head);
2430 				m_head = m_tail = NULL;
2431 				goto nextp;
2432 			}
2433 
2434 			m->m_len = length;
2435 			m_head->m_pkthdr.len += length;
2436 			m_tail->m_next = m;
2437 			m_tail = m;
2438 		}
2439 
2440 		if (rxcd->eop) {
2441 			vmxnet3_rxq_input(rxq, rxcd, m_head);
2442 			m_head = m_tail = NULL;
2443 
2444 			/* Must recheck after dropping the Rx lock. */
2445 			if ((ifp->if_flags & IFF_RUNNING) == 0)
2446 				break;
2447 		}
2448 
2449 nextp:
2450 		if (__predict_false(rxq->vxrxq_rs->update_rxhead)) {
2451 			int qid = rxcd->qid;
2452 			bus_size_t r;
2453 
2454 			idx = (idx + 1) % rxr->vxrxr_ndesc;
2455 			if (qid >= sc->vmx_nrxqueues) {
2456 				qid -= sc->vmx_nrxqueues;
2457 				r = VMXNET3_BAR0_RXH2(qid);
2458 			} else
2459 				r = VMXNET3_BAR0_RXH1(qid);
2460 			vmxnet3_write_bar0(sc, r, idx);
2461 		}
2462 	}
2463 
2464 	return more;
2465 }
2466 
2467 static inline void
2468 vmxnet3_sched_handle_queue(struct vmxnet3_softc *sc, struct vmxnet3_queue *vmxq)
2469 {
2470 
2471 	if (vmxq->vxq_workqueue) {
2472 		/*
2473 		 * When this function is called, "vmxq" is owned by one CPU.
2474 		 * so, atomic operation is not required here.
2475 		 */
2476 		if (!vmxq->vxq_wq_enqueued) {
2477 			vmxq->vxq_wq_enqueued = true;
2478 			workqueue_enqueue(sc->vmx_queue_wq,
2479 			    &vmxq->vxq_wq_cookie, curcpu());
2480 		}
2481 	} else {
2482 		softint_schedule(vmxq->vxq_si);
2483 	}
2484 }
2485 
2486 static int
2487 vmxnet3_legacy_intr(void *xsc)
2488 {
2489 	struct vmxnet3_softc *sc;
2490 	struct vmxnet3_queue *vmxq;
2491 	struct vmxnet3_txqueue *txq;
2492 	struct vmxnet3_rxqueue *rxq;
2493 	u_int txlimit, rxlimit;
2494 	bool txmore, rxmore;
2495 
2496 	sc = xsc;
2497 	vmxq = &sc->vmx_queue[0];
2498 	txq = &vmxq->vxq_txqueue;
2499 	rxq = &vmxq->vxq_rxqueue;
2500 	txlimit = sc->vmx_tx_intr_process_limit;
2501 	rxlimit = sc->vmx_rx_intr_process_limit;
2502 
2503 	if (sc->vmx_intr_type == VMXNET3_IT_LEGACY) {
2504 		if (vmxnet3_read_bar1(sc, VMXNET3_BAR1_INTR) == 0)
2505 			return (0);
2506 	}
2507 	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
2508 		vmxnet3_disable_all_intrs(sc);
2509 
2510 	if (sc->vmx_ds->event != 0)
2511 		vmxnet3_evintr(sc);
2512 
2513 	VMXNET3_TXQ_LOCK(txq);
2514 	txmore = vmxnet3_txq_eof(txq, txlimit);
2515 	VMXNET3_TXQ_UNLOCK(txq);
2516 
2517 	VMXNET3_RXQ_LOCK(rxq);
2518 	rxmore = vmxnet3_rxq_eof(rxq, rxlimit);
2519 	VMXNET3_RXQ_UNLOCK(rxq);
2520 
2521 	if (txmore || rxmore)
2522 		vmxnet3_sched_handle_queue(sc, vmxq);
2523 	else {
2524 		if_schedule_deferred_start(&sc->vmx_ethercom.ec_if);
2525 		vmxnet3_enable_all_intrs(sc);
2526 	}
2527 
2528 	return (1);
2529 }
2530 
2531 static int
2532 vmxnet3_txrxq_intr(void *xvmxq)
2533 {
2534 	struct vmxnet3_softc *sc;
2535 	struct vmxnet3_queue *vmxq;
2536 	struct vmxnet3_txqueue *txq;
2537 	struct vmxnet3_rxqueue *rxq;
2538 	u_int txlimit, rxlimit;
2539 	bool txmore, rxmore;
2540 
2541 	vmxq = xvmxq;
2542 	txq = &vmxq->vxq_txqueue;
2543 	rxq = &vmxq->vxq_rxqueue;
2544 	sc = txq->vxtxq_sc;
2545 	txlimit = sc->vmx_tx_intr_process_limit;
2546 	rxlimit = sc->vmx_rx_intr_process_limit;
2547 	vmxq->vxq_workqueue = sc->vmx_txrx_workqueue;
2548 
2549 	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
2550 		vmxnet3_disable_intr(sc, vmxq->vxq_intr_idx);
2551 
2552 	VMXNET3_TXQ_LOCK(txq);
2553 	txq->vxtxq_intr.ev_count++;
2554 	txmore = vmxnet3_txq_eof(txq, txlimit);
2555 	VMXNET3_TXQ_UNLOCK(txq);
2556 
2557 	VMXNET3_RXQ_LOCK(rxq);
2558 	rxq->vxrxq_intr.ev_count++;
2559 	rxmore = vmxnet3_rxq_eof(rxq, rxlimit);
2560 	VMXNET3_RXQ_UNLOCK(rxq);
2561 
2562 	if (txmore || rxmore)
2563 		vmxnet3_sched_handle_queue(sc, vmxq);
2564 	else {
2565 		/* for ALTQ */
2566 		if (vmxq->vxq_id == 0)
2567 			if_schedule_deferred_start(&sc->vmx_ethercom.ec_if);
2568 		softint_schedule(txq->vxtxq_si);
2569 
2570 		vmxnet3_enable_intr(sc, vmxq->vxq_intr_idx);
2571 	}
2572 
2573 	return (1);
2574 }
2575 
2576 static void
2577 vmxnet3_handle_queue(void *xvmxq)
2578 {
2579 	struct vmxnet3_softc *sc;
2580 	struct vmxnet3_queue *vmxq;
2581 	struct vmxnet3_txqueue *txq;
2582 	struct vmxnet3_rxqueue *rxq;
2583 	u_int txlimit, rxlimit;
2584 	bool txmore, rxmore;
2585 
2586 	vmxq = xvmxq;
2587 	txq = &vmxq->vxq_txqueue;
2588 	rxq = &vmxq->vxq_rxqueue;
2589 	sc = txq->vxtxq_sc;
2590 	txlimit = sc->vmx_tx_process_limit;
2591 	rxlimit = sc->vmx_rx_process_limit;
2592 
2593 	VMXNET3_TXQ_LOCK(txq);
2594 	txq->vxtxq_defer.ev_count++;
2595 	txmore = vmxnet3_txq_eof(txq, txlimit);
2596 	if (txmore)
2597 		txq->vxtxq_deferreq.ev_count++;
2598 	/* for ALTQ */
2599 	if (vmxq->vxq_id == 0)
2600 		if_schedule_deferred_start(&sc->vmx_ethercom.ec_if);
2601 	softint_schedule(txq->vxtxq_si);
2602 	VMXNET3_TXQ_UNLOCK(txq);
2603 
2604 	VMXNET3_RXQ_LOCK(rxq);
2605 	rxq->vxrxq_defer.ev_count++;
2606 	rxmore = vmxnet3_rxq_eof(rxq, rxlimit);
2607 	if (rxmore)
2608 		rxq->vxrxq_deferreq.ev_count++;
2609 	VMXNET3_RXQ_UNLOCK(rxq);
2610 
2611 	if (txmore || rxmore)
2612 		vmxnet3_sched_handle_queue(sc, vmxq);
2613 	else
2614 		vmxnet3_enable_intr(sc, vmxq->vxq_intr_idx);
2615 }
2616 
2617 static void
2618 vmxnet3_handle_queue_work(struct work *wk, void *context)
2619 {
2620 	struct vmxnet3_queue *vmxq;
2621 
2622 	vmxq = container_of(wk, struct vmxnet3_queue, vxq_wq_cookie);
2623 	vmxq->vxq_wq_enqueued = false;
2624 	vmxnet3_handle_queue(vmxq);
2625 }
2626 
2627 static int
2628 vmxnet3_event_intr(void *xsc)
2629 {
2630 	struct vmxnet3_softc *sc;
2631 
2632 	sc = xsc;
2633 
2634 	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
2635 		vmxnet3_disable_intr(sc, sc->vmx_event_intr_idx);
2636 
2637 	sc->vmx_event_intr.ev_count++;
2638 
2639 	if (sc->vmx_ds->event != 0)
2640 		vmxnet3_evintr(sc);
2641 
2642 	vmxnet3_enable_intr(sc, sc->vmx_event_intr_idx);
2643 
2644 	return (1);
2645 }
2646 
2647 static void
2648 vmxnet3_txstop(struct vmxnet3_softc *sc, struct vmxnet3_txqueue *txq)
2649 {
2650 	struct vmxnet3_txring *txr;
2651 	struct vmxnet3_txbuf *txb;
2652 	u_int i;
2653 
2654 	txr = &txq->vxtxq_cmd_ring;
2655 
2656 	for (i = 0; i < txr->vxtxr_ndesc; i++) {
2657 		txb = &txr->vxtxr_txbuf[i];
2658 
2659 		if (txb->vtxb_m == NULL)
2660 			continue;
2661 
2662 		bus_dmamap_sync(sc->vmx_dmat, txb->vtxb_dmamap,
2663 		    0, txb->vtxb_dmamap->dm_mapsize,
2664 		    BUS_DMASYNC_POSTWRITE);
2665 		bus_dmamap_unload(sc->vmx_dmat, txb->vtxb_dmamap);
2666 		m_freem(txb->vtxb_m);
2667 		txb->vtxb_m = NULL;
2668 	}
2669 }
2670 
2671 static void
2672 vmxnet3_rxstop(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rxq)
2673 {
2674 	struct vmxnet3_rxring *rxr;
2675 	struct vmxnet3_rxbuf *rxb;
2676 	u_int i, j;
2677 
2678 	if (rxq->vxrxq_mhead != NULL) {
2679 		m_freem(rxq->vxrxq_mhead);
2680 		rxq->vxrxq_mhead = NULL;
2681 		rxq->vxrxq_mtail = NULL;
2682 	}
2683 
2684 	for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
2685 		rxr = &rxq->vxrxq_cmd_ring[i];
2686 
2687 		for (j = 0; j < rxr->vxrxr_ndesc; j++) {
2688 			rxb = &rxr->vxrxr_rxbuf[j];
2689 
2690 			if (rxb->vrxb_m == NULL)
2691 				continue;
2692 
2693 			bus_dmamap_sync(sc->vmx_dmat, rxb->vrxb_dmamap,
2694 			    0, rxb->vrxb_dmamap->dm_mapsize,
2695 			    BUS_DMASYNC_POSTREAD);
2696 			bus_dmamap_unload(sc->vmx_dmat, rxb->vrxb_dmamap);
2697 			m_freem(rxb->vrxb_m);
2698 			rxb->vrxb_m = NULL;
2699 		}
2700 	}
2701 }
2702 
2703 static void
2704 vmxnet3_stop_rendezvous(struct vmxnet3_softc *sc)
2705 {
2706 	struct vmxnet3_rxqueue *rxq;
2707 	struct vmxnet3_txqueue *txq;
2708 	struct vmxnet3_queue *vmxq;
2709 	int i;
2710 
2711 	for (i = 0; i < sc->vmx_nrxqueues; i++) {
2712 		rxq = &sc->vmx_queue[i].vxq_rxqueue;
2713 		VMXNET3_RXQ_LOCK(rxq);
2714 		VMXNET3_RXQ_UNLOCK(rxq);
2715 	}
2716 	for (i = 0; i < sc->vmx_ntxqueues; i++) {
2717 		txq = &sc->vmx_queue[i].vxq_txqueue;
2718 		VMXNET3_TXQ_LOCK(txq);
2719 		VMXNET3_TXQ_UNLOCK(txq);
2720 	}
2721 	for (i = 0; i < sc->vmx_nrxqueues; i++) {
2722 		vmxq = &sc->vmx_queue[i];
2723 		workqueue_wait(sc->vmx_queue_wq, &vmxq->vxq_wq_cookie);
2724 	}
2725 }
2726 
2727 static void
2728 vmxnet3_stop_locked(struct vmxnet3_softc *sc)
2729 {
2730 	struct ifnet *ifp;
2731 	int q;
2732 
2733 	ifp = &sc->vmx_ethercom.ec_if;
2734 	VMXNET3_CORE_LOCK_ASSERT(sc);
2735 
2736 	ifp->if_flags &= ~IFF_RUNNING;
2737 	sc->vmx_link_active = 0;
2738 	callout_stop(&sc->vmx_tick);
2739 
2740 	/* Disable interrupts. */
2741 	vmxnet3_disable_all_intrs(sc);
2742 	vmxnet3_write_cmd(sc, VMXNET3_CMD_DISABLE);
2743 
2744 	vmxnet3_stop_rendezvous(sc);
2745 
2746 	for (q = 0; q < sc->vmx_ntxqueues; q++)
2747 		vmxnet3_txstop(sc, &sc->vmx_queue[q].vxq_txqueue);
2748 	for (q = 0; q < sc->vmx_nrxqueues; q++)
2749 		vmxnet3_rxstop(sc, &sc->vmx_queue[q].vxq_rxqueue);
2750 
2751 	vmxnet3_write_cmd(sc, VMXNET3_CMD_RESET);
2752 }
2753 
2754 static void
2755 vmxnet3_stop(struct ifnet *ifp, int disable)
2756 {
2757 	struct vmxnet3_softc *sc = ifp->if_softc;
2758 
2759 	VMXNET3_CORE_LOCK(sc);
2760 	vmxnet3_stop_locked(sc);
2761 	VMXNET3_CORE_UNLOCK(sc);
2762 }
2763 
2764 static void
2765 vmxnet3_txinit(struct vmxnet3_softc *sc, struct vmxnet3_txqueue *txq)
2766 {
2767 	struct vmxnet3_txring *txr;
2768 	struct vmxnet3_comp_ring *txc;
2769 
2770 	txr = &txq->vxtxq_cmd_ring;
2771 	txr->vxtxr_head = 0;
2772 	txr->vxtxr_next = 0;
2773 	txr->vxtxr_gen = VMXNET3_INIT_GEN;
2774 	memset(txr->vxtxr_txd, 0,
2775 	    txr->vxtxr_ndesc * sizeof(struct vmxnet3_txdesc));
2776 
2777 	txc = &txq->vxtxq_comp_ring;
2778 	txc->vxcr_next = 0;
2779 	txc->vxcr_gen = VMXNET3_INIT_GEN;
2780 	memset(txc->vxcr_u.txcd, 0,
2781 	    txc->vxcr_ndesc * sizeof(struct vmxnet3_txcompdesc));
2782 }
2783 
2784 static int
2785 vmxnet3_rxinit(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rxq)
2786 {
2787 	struct vmxnet3_rxring *rxr;
2788 	struct vmxnet3_comp_ring *rxc;
2789 	u_int i, populate, idx;
2790 	int error;
2791 
2792 	/* LRO and jumbo frame is not supported yet */
2793 	populate = 1;
2794 
2795 	for (i = 0; i < populate; i++) {
2796 		rxr = &rxq->vxrxq_cmd_ring[i];
2797 		rxr->vxrxr_fill = 0;
2798 		rxr->vxrxr_gen = VMXNET3_INIT_GEN;
2799 		memset(rxr->vxrxr_rxd, 0,
2800 		    rxr->vxrxr_ndesc * sizeof(struct vmxnet3_rxdesc));
2801 
2802 		for (idx = 0; idx < rxr->vxrxr_ndesc; idx++) {
2803 			error = vmxnet3_newbuf(sc, rxq, rxr);
2804 			if (error)
2805 				return (error);
2806 		}
2807 	}
2808 
2809 	for (/**/; i < VMXNET3_RXRINGS_PERQ; i++) {
2810 		rxr = &rxq->vxrxq_cmd_ring[i];
2811 		rxr->vxrxr_fill = 0;
2812 		rxr->vxrxr_gen = 0;
2813 		memset(rxr->vxrxr_rxd, 0,
2814 		    rxr->vxrxr_ndesc * sizeof(struct vmxnet3_rxdesc));
2815 	}
2816 
2817 	rxc = &rxq->vxrxq_comp_ring;
2818 	rxc->vxcr_next = 0;
2819 	rxc->vxcr_gen = VMXNET3_INIT_GEN;
2820 	memset(rxc->vxcr_u.rxcd, 0,
2821 	    rxc->vxcr_ndesc * sizeof(struct vmxnet3_rxcompdesc));
2822 
2823 	return (0);
2824 }
2825 
2826 static int
2827 vmxnet3_reinit_queues(struct vmxnet3_softc *sc)
2828 {
2829 	device_t dev;
2830 	int q, error;
2831 	dev = sc->vmx_dev;
2832 
2833 	for (q = 0; q < sc->vmx_ntxqueues; q++)
2834 		vmxnet3_txinit(sc, &sc->vmx_queue[q].vxq_txqueue);
2835 
2836 	for (q = 0; q < sc->vmx_nrxqueues; q++) {
2837 		error = vmxnet3_rxinit(sc, &sc->vmx_queue[q].vxq_rxqueue);
2838 		if (error) {
2839 			device_printf(dev, "cannot populate Rx queue %d\n", q);
2840 			return (error);
2841 		}
2842 	}
2843 
2844 	return (0);
2845 }
2846 
2847 static int
2848 vmxnet3_enable_device(struct vmxnet3_softc *sc)
2849 {
2850 	int q;
2851 
2852 	if (vmxnet3_read_cmd(sc, VMXNET3_CMD_ENABLE) != 0) {
2853 		device_printf(sc->vmx_dev, "device enable command failed!\n");
2854 		return (1);
2855 	}
2856 
2857 	/* Reset the Rx queue heads. */
2858 	for (q = 0; q < sc->vmx_nrxqueues; q++) {
2859 		vmxnet3_write_bar0(sc, VMXNET3_BAR0_RXH1(q), 0);
2860 		vmxnet3_write_bar0(sc, VMXNET3_BAR0_RXH2(q), 0);
2861 	}
2862 
2863 	return (0);
2864 }
2865 
2866 static void
2867 vmxnet3_reinit_rxfilters(struct vmxnet3_softc *sc)
2868 {
2869 
2870 	vmxnet3_set_rxfilter(sc);
2871 
2872 	memset(sc->vmx_ds->vlan_filter, 0, sizeof(sc->vmx_ds->vlan_filter));
2873 	vmxnet3_write_cmd(sc, VMXNET3_CMD_VLAN_FILTER);
2874 }
2875 
2876 static int
2877 vmxnet3_reinit(struct vmxnet3_softc *sc)
2878 {
2879 
2880 	vmxnet3_set_lladdr(sc);
2881 	vmxnet3_reinit_shared_data(sc);
2882 
2883 	if (vmxnet3_reinit_queues(sc) != 0)
2884 		return (ENXIO);
2885 
2886 	if (vmxnet3_enable_device(sc) != 0)
2887 		return (ENXIO);
2888 
2889 	vmxnet3_reinit_rxfilters(sc);
2890 
2891 	return (0);
2892 }
2893 
2894 static int
2895 vmxnet3_init_locked(struct vmxnet3_softc *sc)
2896 {
2897 	struct ifnet *ifp = &sc->vmx_ethercom.ec_if;
2898 	int error;
2899 
2900 	vmxnet3_stop_locked(sc);
2901 
2902 	error = vmxnet3_reinit(sc);
2903 	if (error) {
2904 		vmxnet3_stop_locked(sc);
2905 		return (error);
2906 	}
2907 
2908 	ifp->if_flags |= IFF_RUNNING;
2909 	vmxnet3_if_link_status(sc);
2910 
2911 	vmxnet3_enable_all_intrs(sc);
2912 	callout_reset(&sc->vmx_tick, hz, vmxnet3_tick, sc);
2913 
2914 	return (0);
2915 }
2916 
2917 static int
2918 vmxnet3_init(struct ifnet *ifp)
2919 {
2920 	struct vmxnet3_softc *sc = ifp->if_softc;
2921 	int error;
2922 
2923 	VMXNET3_CORE_LOCK(sc);
2924 	error = vmxnet3_init_locked(sc);
2925 	VMXNET3_CORE_UNLOCK(sc);
2926 
2927 	return (error);
2928 }
2929 
2930 static int
2931 vmxnet3_txq_offload_ctx(struct vmxnet3_txqueue *txq, struct mbuf *m,
2932     int *start, int *csum_start)
2933 {
2934 	struct ether_header *eh;
2935 	struct mbuf *mp;
2936 	int offset, csum_off, iphl, offp;
2937 	bool v4;
2938 
2939 	eh = mtod(m, struct ether_header *);
2940 	switch (htons(eh->ether_type)) {
2941 	case ETHERTYPE_IP:
2942 	case ETHERTYPE_IPV6:
2943 		offset = ETHER_HDR_LEN;
2944 		break;
2945 	case ETHERTYPE_VLAN:
2946 		offset = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
2947 		break;
2948 	default:
2949 		m_freem(m);
2950 		return (EINVAL);
2951 	}
2952 
2953 	if ((m->m_pkthdr.csum_flags &
2954 	    (M_CSUM_TSOv4 | M_CSUM_UDPv4 | M_CSUM_TCPv4)) != 0) {
2955 		iphl = M_CSUM_DATA_IPv4_IPHL(m->m_pkthdr.csum_data);
2956 		v4 = true;
2957 	} else {
2958 		iphl = M_CSUM_DATA_IPv6_IPHL(m->m_pkthdr.csum_data);
2959 		v4 = false;
2960 	}
2961 	*start = offset + iphl;
2962 
2963 	if (m->m_pkthdr.csum_flags &
2964 	    (M_CSUM_TCPv4 | M_CSUM_TCPv6 | M_CSUM_TSOv4 | M_CSUM_TSOv6)) {
2965 		csum_off = offsetof(struct tcphdr, th_sum);
2966 	} else {
2967 		csum_off = offsetof(struct udphdr, uh_sum);
2968 	}
2969 
2970 	*csum_start = *start + csum_off;
2971 	mp = m_pulldown(m, 0, *csum_start + 2, &offp);
2972 	if (!mp) {
2973 		/* m is already freed */
2974 		return ENOBUFS;
2975 	}
2976 
2977 	if (m->m_pkthdr.csum_flags & (M_CSUM_TSOv4 | M_CSUM_TSOv6)) {
2978 		struct tcphdr *tcp;
2979 
2980 		txq->vxtxq_stats.vmtxs_tso++;
2981 		tcp = (void *)(mtod(mp, char *) + offp + *start);
2982 
2983 		if (v4) {
2984 			struct ip *ip;
2985 
2986 			ip = (void *)(mtod(mp, char *) + offp + offset);
2987 			tcp->th_sum = in_cksum_phdr(ip->ip_src.s_addr,
2988 			    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
2989 		} else {
2990 			struct ip6_hdr *ip6;
2991 
2992 			ip6 = (void *)(mtod(mp, char *) + offp + offset);
2993 			tcp->th_sum = in6_cksum_phdr(&ip6->ip6_src,
2994 			    &ip6->ip6_dst, 0, htonl(IPPROTO_TCP));
2995 		}
2996 
2997 		/*
2998 		 * For TSO, the size of the protocol header is also
2999 		 * included in the descriptor header size.
3000 		 */
3001 		*start += (tcp->th_off << 2);
3002 	} else
3003 		txq->vxtxq_stats.vmtxs_csum++;
3004 
3005 	return (0);
3006 }
3007 
3008 static int
3009 vmxnet3_txq_load_mbuf(struct vmxnet3_txqueue *txq, struct mbuf **m0,
3010     bus_dmamap_t dmap)
3011 {
3012 	struct mbuf *m;
3013 	bus_dma_tag_t tag;
3014 	int error;
3015 
3016 	m = *m0;
3017 	tag = txq->vxtxq_sc->vmx_dmat;
3018 
3019 	error = bus_dmamap_load_mbuf(tag, dmap, m, BUS_DMA_NOWAIT);
3020 	if (error == 0 || error != EFBIG)
3021 		return (error);
3022 
3023 	m = m_defrag(m, M_NOWAIT);
3024 	if (m != NULL) {
3025 		*m0 = m;
3026 		error = bus_dmamap_load_mbuf(tag, dmap, m, BUS_DMA_NOWAIT);
3027 	} else
3028 		error = ENOBUFS;
3029 
3030 	if (error) {
3031 		m_freem(*m0);
3032 		*m0 = NULL;
3033 		txq->vxtxq_defrag_failed.ev_count++;
3034 	} else
3035 		txq->vxtxq_defragged.ev_count++;
3036 
3037 	return (error);
3038 }
3039 
3040 static void
3041 vmxnet3_txq_unload_mbuf(struct vmxnet3_txqueue *txq, bus_dmamap_t dmap)
3042 {
3043 
3044 	bus_dmamap_unload(txq->vxtxq_sc->vmx_dmat, dmap);
3045 }
3046 
3047 static int
3048 vmxnet3_txq_encap(struct vmxnet3_txqueue *txq, struct mbuf **m0)
3049 {
3050 	struct vmxnet3_softc *sc;
3051 	struct vmxnet3_txring *txr;
3052 	struct vmxnet3_txdesc *txd, *sop;
3053 	struct mbuf *m;
3054 	bus_dmamap_t dmap;
3055 	bus_dma_segment_t *segs;
3056 	int i, gen, start, csum_start, nsegs, error;
3057 
3058 	sc = txq->vxtxq_sc;
3059 	start = 0;
3060 	txd = NULL;
3061 	txr = &txq->vxtxq_cmd_ring;
3062 	dmap = txr->vxtxr_txbuf[txr->vxtxr_head].vtxb_dmamap;
3063 	csum_start = 0; /* GCC */
3064 
3065 	error = vmxnet3_txq_load_mbuf(txq, m0, dmap);
3066 	if (error)
3067 		return (error);
3068 
3069 	nsegs = dmap->dm_nsegs;
3070 	segs = dmap->dm_segs;
3071 
3072 	m = *m0;
3073 	KASSERT(m->m_flags & M_PKTHDR);
3074 	KASSERT(nsegs <= VMXNET3_TX_MAXSEGS);
3075 
3076 	if (vmxnet3_txring_avail(txr) < nsegs) {
3077 		txq->vxtxq_stats.vmtxs_full++;
3078 		vmxnet3_txq_unload_mbuf(txq, dmap);
3079 		return (ENOSPC);
3080 	} else if (m->m_pkthdr.csum_flags & VMXNET3_CSUM_ALL_OFFLOAD) {
3081 		error = vmxnet3_txq_offload_ctx(txq, m, &start, &csum_start);
3082 		if (error) {
3083 			/* m is already freed */
3084 			txq->vxtxq_stats.vmtxs_offload_failed++;
3085 			vmxnet3_txq_unload_mbuf(txq, dmap);
3086 			*m0 = NULL;
3087 			return (error);
3088 		}
3089 	}
3090 
3091 	txr->vxtxr_txbuf[txr->vxtxr_head].vtxb_m = m;
3092 	sop = &txr->vxtxr_txd[txr->vxtxr_head];
3093 	gen = txr->vxtxr_gen ^ 1;	/* Owned by cpu (yet) */
3094 
3095 	for (i = 0; i < nsegs; i++) {
3096 		txd = &txr->vxtxr_txd[txr->vxtxr_head];
3097 
3098 		txd->addr = segs[i].ds_addr;
3099 		txd->len = segs[i].ds_len;
3100 		txd->gen = gen;
3101 		txd->dtype = 0;
3102 		txd->offload_mode = VMXNET3_OM_NONE;
3103 		txd->offload_pos = 0;
3104 		txd->hlen = 0;
3105 		txd->eop = 0;
3106 		txd->compreq = 0;
3107 		txd->vtag_mode = 0;
3108 		txd->vtag = 0;
3109 
3110 		if (++txr->vxtxr_head == txr->vxtxr_ndesc) {
3111 			txr->vxtxr_head = 0;
3112 			txr->vxtxr_gen ^= 1;
3113 		}
3114 		gen = txr->vxtxr_gen;
3115 	}
3116 	txd->eop = 1;
3117 	txd->compreq = 1;
3118 
3119 	if (vlan_has_tag(m)) {
3120 		sop->vtag_mode = 1;
3121 		sop->vtag = vlan_get_tag(m);
3122 	}
3123 
3124 	if (m->m_pkthdr.csum_flags & (M_CSUM_TSOv4 | M_CSUM_TSOv6)) {
3125 		sop->offload_mode = VMXNET3_OM_TSO;
3126 		sop->hlen = start;
3127 		sop->offload_pos = m->m_pkthdr.segsz;
3128 	} else if (m->m_pkthdr.csum_flags & (VMXNET3_CSUM_OFFLOAD |
3129 	    VMXNET3_CSUM_OFFLOAD_IPV6)) {
3130 		sop->offload_mode = VMXNET3_OM_CSUM;
3131 		sop->hlen = start;
3132 		sop->offload_pos = csum_start;
3133 	}
3134 
3135 	/* Finally, change the ownership. */
3136 	vmxnet3_barrier(sc, VMXNET3_BARRIER_WR);
3137 	sop->gen ^= 1;
3138 
3139 	txq->vxtxq_ts->npending += nsegs;
3140 	if (txq->vxtxq_ts->npending >= txq->vxtxq_ts->intr_threshold) {
3141 		struct vmxnet3_queue *vmxq;
3142 		vmxq = container_of(txq, struct vmxnet3_queue, vxq_txqueue);
3143 		txq->vxtxq_ts->npending = 0;
3144 		vmxnet3_write_bar0(sc, VMXNET3_BAR0_TXH(vmxq->vxq_id),
3145 		    txr->vxtxr_head);
3146 	}
3147 
3148 	return (0);
3149 }
3150 
3151 #define VMXNET3_TX_START 1
3152 #define VMXNET3_TX_TRANSMIT 2
3153 static inline void
3154 vmxnet3_tx_common_locked(struct ifnet *ifp, struct vmxnet3_txqueue *txq, int txtype)
3155 {
3156 	struct vmxnet3_softc *sc;
3157 	struct vmxnet3_txring *txr;
3158 	struct mbuf *m_head;
3159 	int tx;
3160 
3161 	sc = ifp->if_softc;
3162 	txr = &txq->vxtxq_cmd_ring;
3163 	tx = 0;
3164 
3165 	VMXNET3_TXQ_LOCK_ASSERT(txq);
3166 
3167 	if ((ifp->if_flags & IFF_RUNNING) == 0 ||
3168 	    sc->vmx_link_active == 0)
3169 		return;
3170 
3171 	for (;;) {
3172 		if (txtype == VMXNET3_TX_START)
3173 			IFQ_POLL(&ifp->if_snd, m_head);
3174 		else
3175 			m_head = pcq_peek(txq->vxtxq_interq);
3176 		if (m_head == NULL)
3177 			break;
3178 
3179 		if (vmxnet3_txring_avail(txr) < VMXNET3_TX_MAXSEGS)
3180 			break;
3181 
3182 		if (txtype == VMXNET3_TX_START)
3183 			IFQ_DEQUEUE(&ifp->if_snd, m_head);
3184 		else
3185 			m_head = pcq_get(txq->vxtxq_interq);
3186 		if (m_head == NULL)
3187 			break;
3188 
3189 		if (vmxnet3_txq_encap(txq, &m_head) != 0) {
3190 			if (m_head != NULL)
3191 				m_freem(m_head);
3192 			break;
3193 		}
3194 
3195 		tx++;
3196 		bpf_mtap(ifp, m_head, BPF_D_OUT);
3197 	}
3198 
3199 	if (tx > 0)
3200 		txq->vxtxq_watchdog = VMXNET3_WATCHDOG_TIMEOUT;
3201 }
3202 
3203 static void
3204 vmxnet3_start_locked(struct ifnet *ifp)
3205 {
3206 	struct vmxnet3_softc *sc;
3207 	struct vmxnet3_txqueue *txq;
3208 
3209 	sc = ifp->if_softc;
3210 	txq = &sc->vmx_queue[0].vxq_txqueue;
3211 
3212 	vmxnet3_tx_common_locked(ifp, txq, VMXNET3_TX_START);
3213 }
3214 
3215 void
3216 vmxnet3_start(struct ifnet *ifp)
3217 {
3218 	struct vmxnet3_softc *sc;
3219 	struct vmxnet3_txqueue *txq;
3220 
3221 	sc = ifp->if_softc;
3222 	txq = &sc->vmx_queue[0].vxq_txqueue;
3223 
3224 	VMXNET3_TXQ_LOCK(txq);
3225 	vmxnet3_start_locked(ifp);
3226 	VMXNET3_TXQ_UNLOCK(txq);
3227 }
3228 
3229 static int
3230 vmxnet3_select_txqueue(struct ifnet *ifp, struct mbuf *m __unused)
3231 {
3232 	struct vmxnet3_softc *sc;
3233 	u_int cpuid;
3234 
3235 	sc = ifp->if_softc;
3236 	cpuid = cpu_index(curcpu());
3237 	/*
3238 	 * Furure work
3239 	 * We should select txqueue to even up the load even if ncpu is
3240 	 * different from sc->vmx_ntxqueues. Currently, the load is not
3241 	 * even, that is, when ncpu is six and ntxqueues is four, the load
3242 	 * of vmx_queue[0] and vmx_queue[1] is higher than vmx_queue[2] and
3243 	 * vmx_queue[3] because CPU#4 always uses vmx_queue[0] and CPU#5 always
3244 	 * uses vmx_queue[1].
3245 	 * Furthermore, we should not use random value to select txqueue to
3246 	 * avoid reordering. We should use flow information of mbuf.
3247 	 */
3248 	return cpuid % sc->vmx_ntxqueues;
3249 }
3250 
3251 static void
3252 vmxnet3_transmit_locked(struct ifnet *ifp, struct vmxnet3_txqueue *txq)
3253 {
3254 
3255 	vmxnet3_tx_common_locked(ifp, txq, VMXNET3_TX_TRANSMIT);
3256 }
3257 
3258 static int
3259 vmxnet3_transmit(struct ifnet *ifp, struct mbuf *m)
3260 {
3261 	struct vmxnet3_softc *sc;
3262 	struct vmxnet3_txqueue *txq;
3263 	int qid;
3264 
3265 	qid = vmxnet3_select_txqueue(ifp, m);
3266 	sc = ifp->if_softc;
3267 	txq = &sc->vmx_queue[qid].vxq_txqueue;
3268 
3269 	if (__predict_false(!pcq_put(txq->vxtxq_interq, m))) {
3270 		VMXNET3_TXQ_LOCK(txq);
3271 		txq->vxtxq_pcqdrop.ev_count++;
3272 		VMXNET3_TXQ_UNLOCK(txq);
3273 		m_freem(m);
3274 		return ENOBUFS;
3275 	}
3276 
3277 #ifdef VMXNET3_ALWAYS_TXDEFER
3278 	kpreempt_disable();
3279 	softint_schedule(txq->vxtxq_si);
3280 	kpreempt_enable();
3281 #else
3282 	if (VMXNET3_TXQ_TRYLOCK(txq)) {
3283 		vmxnet3_transmit_locked(ifp, txq);
3284 		VMXNET3_TXQ_UNLOCK(txq);
3285 	} else {
3286 		kpreempt_disable();
3287 		softint_schedule(txq->vxtxq_si);
3288 		kpreempt_enable();
3289 	}
3290 #endif
3291 
3292 	return 0;
3293 }
3294 
3295 static void
3296 vmxnet3_deferred_transmit(void *arg)
3297 {
3298 	struct vmxnet3_txqueue *txq = arg;
3299 	struct vmxnet3_softc *sc = txq->vxtxq_sc;
3300 	struct ifnet *ifp = &sc->vmx_ethercom.ec_if;
3301 
3302 	VMXNET3_TXQ_LOCK(txq);
3303 	txq->vxtxq_transmitdef.ev_count++;
3304 	if (pcq_peek(txq->vxtxq_interq) != NULL)
3305 		vmxnet3_transmit_locked(ifp, txq);
3306 	VMXNET3_TXQ_UNLOCK(txq);
3307 }
3308 
3309 static void
3310 vmxnet3_set_rxfilter(struct vmxnet3_softc *sc)
3311 {
3312 	struct ifnet *ifp = &sc->vmx_ethercom.ec_if;
3313 	struct ethercom *ec = &sc->vmx_ethercom;
3314 	struct vmxnet3_driver_shared *ds = sc->vmx_ds;
3315 	struct ether_multi *enm;
3316 	struct ether_multistep step;
3317 	u_int mode;
3318 	uint8_t *p;
3319 
3320 	ds->mcast_tablelen = 0;
3321 	ETHER_LOCK(ec);
3322 	CLR(ec->ec_flags, ETHER_F_ALLMULTI);
3323 	ETHER_UNLOCK(ec);
3324 
3325 	/*
3326 	 * Always accept broadcast frames.
3327 	 * Always accept frames destined to our station address.
3328 	 */
3329 	mode = VMXNET3_RXMODE_BCAST | VMXNET3_RXMODE_UCAST;
3330 
3331 	ETHER_LOCK(ec);
3332 	if (ISSET(ifp->if_flags, IFF_PROMISC) ||
3333 	    ec->ec_multicnt > VMXNET3_MULTICAST_MAX)
3334 		goto allmulti;
3335 
3336 	p = sc->vmx_mcast;
3337 	ETHER_FIRST_MULTI(step, ec, enm);
3338 	while (enm != NULL) {
3339 		if (memcmp(enm->enm_addrlo, enm->enm_addrhi, ETHER_ADDR_LEN)) {
3340 			/*
3341 			 * We must listen to a range of multicast addresses.
3342 			 * For now, just accept all multicasts, rather than
3343 			 * trying to set only those filter bits needed to match
3344 			 * the range.  (At this time, the only use of address
3345 			 * ranges is for IP multicast routing, for which the
3346 			 * range is big enough to require all bits set.)
3347 			 */
3348 			goto allmulti;
3349 		}
3350 		memcpy(p, enm->enm_addrlo, ETHER_ADDR_LEN);
3351 
3352 		p += ETHER_ADDR_LEN;
3353 
3354 		ETHER_NEXT_MULTI(step, enm);
3355 	}
3356 
3357 	if (ec->ec_multicnt > 0) {
3358 		SET(mode, VMXNET3_RXMODE_MCAST);
3359 		ds->mcast_tablelen = p - sc->vmx_mcast;
3360 	}
3361 	ETHER_UNLOCK(ec);
3362 
3363 	goto setit;
3364 
3365 allmulti:
3366 	SET(ec->ec_flags, ETHER_F_ALLMULTI);
3367 	ETHER_UNLOCK(ec);
3368 	SET(mode, (VMXNET3_RXMODE_ALLMULTI | VMXNET3_RXMODE_MCAST));
3369 	if (ifp->if_flags & IFF_PROMISC)
3370 		SET(mode, VMXNET3_RXMODE_PROMISC);
3371 
3372 setit:
3373 	vmxnet3_write_cmd(sc, VMXNET3_CMD_SET_FILTER);
3374 	ds->rxmode = mode;
3375 	vmxnet3_write_cmd(sc, VMXNET3_CMD_SET_RXMODE);
3376 }
3377 
3378 static int
3379 vmxnet3_ioctl(struct ifnet *ifp, u_long cmd, void *data)
3380 {
3381 	struct vmxnet3_softc *sc = ifp->if_softc;
3382 	struct ifreq *ifr = (struct ifreq *)data;
3383 	int s, error = 0;
3384 
3385 	switch (cmd) {
3386 	case SIOCSIFMTU: {
3387 		int nmtu = ifr->ifr_mtu;
3388 
3389 		if (nmtu < VMXNET3_MIN_MTU || nmtu > VMXNET3_MAX_MTU) {
3390 			error = EINVAL;
3391 			break;
3392 		}
3393 		if (ifp->if_mtu != (uint64_t)nmtu) {
3394 			s = splnet();
3395 			error = ether_ioctl(ifp, cmd, data);
3396 			splx(s);
3397 			if (error == ENETRESET)
3398 				error = vmxnet3_init(ifp);
3399 		}
3400 		break;
3401 	}
3402 
3403 	default:
3404 		s = splnet();
3405 		error = ether_ioctl(ifp, cmd, data);
3406 		splx(s);
3407 	}
3408 
3409 	if (error == ENETRESET) {
3410 		VMXNET3_CORE_LOCK(sc);
3411 		if (ifp->if_flags & IFF_RUNNING)
3412 			vmxnet3_set_rxfilter(sc);
3413 		VMXNET3_CORE_UNLOCK(sc);
3414 		error = 0;
3415 	}
3416 
3417 	return error;
3418 }
3419 
3420 static int
3421 vmxnet3_ifflags_cb(struct ethercom *ec)
3422 {
3423 	struct vmxnet3_softc *sc;
3424 
3425 	sc = ec->ec_if.if_softc;
3426 
3427 	VMXNET3_CORE_LOCK(sc);
3428 	vmxnet3_set_rxfilter(sc);
3429 	VMXNET3_CORE_UNLOCK(sc);
3430 
3431 	vmxnet3_if_link_status(sc);
3432 
3433 	return 0;
3434 }
3435 
3436 static int
3437 vmxnet3_watchdog(struct vmxnet3_txqueue *txq)
3438 {
3439 	struct vmxnet3_softc *sc;
3440 	struct vmxnet3_queue *vmxq;
3441 
3442 	sc = txq->vxtxq_sc;
3443 	vmxq = container_of(txq, struct vmxnet3_queue, vxq_txqueue);
3444 
3445 	VMXNET3_TXQ_LOCK(txq);
3446 	if (txq->vxtxq_watchdog == 0 || --txq->vxtxq_watchdog) {
3447 		VMXNET3_TXQ_UNLOCK(txq);
3448 		return (0);
3449 	}
3450 	txq->vxtxq_watchdogto.ev_count++;
3451 	VMXNET3_TXQ_UNLOCK(txq);
3452 
3453 	device_printf(sc->vmx_dev, "watchdog timeout on queue %d\n",
3454 	    vmxq->vxq_id);
3455 	return (1);
3456 }
3457 
3458 static void
3459 vmxnet3_refresh_host_stats(struct vmxnet3_softc *sc)
3460 {
3461 
3462 	vmxnet3_write_cmd(sc, VMXNET3_CMD_GET_STATS);
3463 }
3464 
3465 static void
3466 vmxnet3_tick(void *xsc)
3467 {
3468 	struct vmxnet3_softc *sc;
3469 	int i, timedout;
3470 
3471 	sc = xsc;
3472 	timedout = 0;
3473 
3474 	VMXNET3_CORE_LOCK(sc);
3475 
3476 	vmxnet3_refresh_host_stats(sc);
3477 
3478 	for (i = 0; i < sc->vmx_ntxqueues; i++)
3479 		timedout |= vmxnet3_watchdog(&sc->vmx_queue[i].vxq_txqueue);
3480 
3481 	if (timedout != 0)
3482 		vmxnet3_init_locked(sc);
3483 	else
3484 		callout_reset(&sc->vmx_tick, hz, vmxnet3_tick, sc);
3485 
3486 	VMXNET3_CORE_UNLOCK(sc);
3487 }
3488 
3489 /*
3490  * update link state of ifnet and softc
3491  */
3492 static void
3493 vmxnet3_if_link_status(struct vmxnet3_softc *sc)
3494 {
3495 	struct ifnet *ifp = &sc->vmx_ethercom.ec_if;
3496 	u_int link;
3497 	bool up;
3498 
3499 	up = vmxnet3_cmd_link_status(ifp);
3500 	if (up) {
3501 		sc->vmx_link_active = 1;
3502 		link = LINK_STATE_UP;
3503 	} else {
3504 		sc->vmx_link_active = 0;
3505 		link = LINK_STATE_DOWN;
3506 	}
3507 
3508 	if_link_state_change(ifp, link);
3509 }
3510 
3511 /*
3512  * check vmx(4) state by VMXNET3_CMD and update ifp->if_baudrate
3513  *   returns
3514  *       - true:  link up
3515  *       - flase: link down
3516  */
3517 static bool
3518 vmxnet3_cmd_link_status(struct ifnet *ifp)
3519 {
3520 	struct vmxnet3_softc *sc = ifp->if_softc;
3521 	u_int x, speed;
3522 
3523 	x = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_LINK);
3524 	if ((x & 1) == 0)
3525 		return false;
3526 
3527 	speed = x >> 16;
3528 	ifp->if_baudrate = IF_Mbps(speed);
3529 	return true;
3530 }
3531 
3532 static void
3533 vmxnet3_ifmedia_status(struct ifnet *ifp, struct ifmediareq *ifmr)
3534 {
3535 	bool up;
3536 
3537 	ifmr->ifm_status = IFM_AVALID;
3538 	ifmr->ifm_active = IFM_ETHER;
3539 
3540 	up = vmxnet3_cmd_link_status(ifp);
3541 	if (!up)
3542 		return;
3543 
3544 	ifmr->ifm_status |= IFM_ACTIVE;
3545 
3546 	if (ifp->if_baudrate >= IF_Gbps(10ULL))
3547 		ifmr->ifm_active |= IFM_10G_T;
3548 }
3549 
3550 static int
3551 vmxnet3_ifmedia_change(struct ifnet *ifp)
3552 {
3553 	return 0;
3554 }
3555 
3556 static void
3557 vmxnet3_set_lladdr(struct vmxnet3_softc *sc)
3558 {
3559 	uint32_t ml, mh;
3560 
3561 	ml  = sc->vmx_lladdr[0];
3562 	ml |= sc->vmx_lladdr[1] << 8;
3563 	ml |= sc->vmx_lladdr[2] << 16;
3564 	ml |= sc->vmx_lladdr[3] << 24;
3565 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_MACL, ml);
3566 
3567 	mh  = sc->vmx_lladdr[4];
3568 	mh |= sc->vmx_lladdr[5] << 8;
3569 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_MACH, mh);
3570 }
3571 
3572 static void
3573 vmxnet3_get_lladdr(struct vmxnet3_softc *sc)
3574 {
3575 	uint32_t ml, mh;
3576 
3577 	ml = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_MACL);
3578 	mh = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_MACH);
3579 
3580 	sc->vmx_lladdr[0] = ml;
3581 	sc->vmx_lladdr[1] = ml >> 8;
3582 	sc->vmx_lladdr[2] = ml >> 16;
3583 	sc->vmx_lladdr[3] = ml >> 24;
3584 	sc->vmx_lladdr[4] = mh;
3585 	sc->vmx_lladdr[5] = mh >> 8;
3586 }
3587 
3588 static void
3589 vmxnet3_enable_all_intrs(struct vmxnet3_softc *sc)
3590 {
3591 	int i;
3592 
3593 	sc->vmx_ds->ictrl &= ~VMXNET3_ICTRL_DISABLE_ALL;
3594 	for (i = 0; i < sc->vmx_nintrs; i++)
3595 		vmxnet3_enable_intr(sc, i);
3596 }
3597 
3598 static void
3599 vmxnet3_disable_all_intrs(struct vmxnet3_softc *sc)
3600 {
3601 	int i;
3602 
3603 	sc->vmx_ds->ictrl |= VMXNET3_ICTRL_DISABLE_ALL;
3604 	for (i = 0; i < sc->vmx_nintrs; i++)
3605 		vmxnet3_disable_intr(sc, i);
3606 }
3607 
3608 static int
3609 vmxnet3_dma_malloc(struct vmxnet3_softc *sc, bus_size_t size, bus_size_t align,
3610     struct vmxnet3_dma_alloc *dma)
3611 {
3612 	bus_dma_tag_t t = sc->vmx_dmat;
3613 	bus_dma_segment_t *segs = dma->dma_segs;
3614 	int n, error;
3615 
3616 	memset(dma, 0, sizeof(*dma));
3617 
3618 	error = bus_dmamem_alloc(t, size, align, 0, segs, 1, &n, BUS_DMA_NOWAIT);
3619 	if (error) {
3620 		aprint_error_dev(sc->vmx_dev, "bus_dmamem_alloc failed: %d\n", error);
3621 		goto fail1;
3622 	}
3623 	KASSERT(n == 1);
3624 
3625 	error = bus_dmamem_map(t, segs, 1, size, &dma->dma_vaddr, BUS_DMA_NOWAIT);
3626 	if (error) {
3627 		aprint_error_dev(sc->vmx_dev, "bus_dmamem_map failed: %d\n", error);
3628 		goto fail2;
3629 	}
3630 
3631 	error = bus_dmamap_create(t, size, 1, size, 0, BUS_DMA_NOWAIT, &dma->dma_map);
3632 	if (error) {
3633 		aprint_error_dev(sc->vmx_dev, "bus_dmamap_create failed: %d\n", error);
3634 		goto fail3;
3635 	}
3636 
3637 	error = bus_dmamap_load(t, dma->dma_map, dma->dma_vaddr, size, NULL,
3638 	    BUS_DMA_NOWAIT);
3639 	if (error) {
3640 		aprint_error_dev(sc->vmx_dev, "bus_dmamap_load failed: %d\n", error);
3641 		goto fail4;
3642 	}
3643 
3644 	memset(dma->dma_vaddr, 0, size);
3645 	dma->dma_paddr = DMAADDR(dma->dma_map);
3646 	dma->dma_size = size;
3647 
3648 	return (0);
3649 fail4:
3650 	bus_dmamap_destroy(t, dma->dma_map);
3651 fail3:
3652 	bus_dmamem_unmap(t, dma->dma_vaddr, size);
3653 fail2:
3654 	bus_dmamem_free(t, segs, 1);
3655 fail1:
3656 	return (error);
3657 }
3658 
3659 static void
3660 vmxnet3_dma_free(struct vmxnet3_softc *sc, struct vmxnet3_dma_alloc *dma)
3661 {
3662 	bus_dma_tag_t t = sc->vmx_dmat;
3663 
3664 	bus_dmamap_unload(t, dma->dma_map);
3665 	bus_dmamap_destroy(t, dma->dma_map);
3666 	bus_dmamem_unmap(t, dma->dma_vaddr, dma->dma_size);
3667 	bus_dmamem_free(t, dma->dma_segs, 1);
3668 
3669 	memset(dma, 0, sizeof(*dma));
3670 }
3671 
3672 MODULE(MODULE_CLASS_DRIVER, if_vmx, "pci");
3673 
3674 #ifdef _MODULE
3675 #include "ioconf.c"
3676 #endif
3677 
3678 static int
3679 if_vmx_modcmd(modcmd_t cmd, void *opaque)
3680 {
3681 	int error = 0;
3682 
3683 	switch (cmd) {
3684 	case MODULE_CMD_INIT:
3685 #ifdef _MODULE
3686 		error = config_init_component(cfdriver_ioconf_if_vmx,
3687 		    cfattach_ioconf_if_vmx, cfdata_ioconf_if_vmx);
3688 #endif
3689 		return error;
3690 	case MODULE_CMD_FINI:
3691 #ifdef _MODULE
3692 		error = config_fini_component(cfdriver_ioconf_if_vmx,
3693 		    cfattach_ioconf_if_vmx, cfdata_ioconf_if_vmx);
3694 #endif
3695 		return error;
3696 	default:
3697 		return ENOTTY;
3698 	}
3699 }
3700 
3701