xref: /netbsd-src/sys/dev/pci/if_vmx.c (revision 627f7eb200a4419d89b531d55fccd2ee3ffdcde0)
1 /*	$NetBSD: if_vmx.c,v 1.4 2020/10/15 04:37:48 ryo Exp $	*/
2 /*	$OpenBSD: if_vmx.c,v 1.16 2014/01/22 06:04:17 brad Exp $	*/
3 
4 /*
5  * Copyright (c) 2013 Tsubai Masanari
6  * Copyright (c) 2013 Bryan Venteicher <bryanv@FreeBSD.org>
7  *
8  * Permission to use, copy, modify, and distribute this software for any
9  * purpose with or without fee is hereby granted, provided that the above
10  * copyright notice and this permission notice appear in all copies.
11  *
12  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
13  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
14  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
15  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
16  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
17  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
18  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
19  */
20 
21 #include <sys/cdefs.h>
22 __KERNEL_RCSID(0, "$NetBSD: if_vmx.c,v 1.4 2020/10/15 04:37:48 ryo Exp $");
23 
24 #include <sys/param.h>
25 #include <sys/cpu.h>
26 #include <sys/kernel.h>
27 #include <sys/kmem.h>
28 #include <sys/bitops.h>
29 #include <sys/bus.h>
30 #include <sys/device.h>
31 #include <sys/mbuf.h>
32 #include <sys/module.h>
33 #include <sys/sockio.h>
34 #include <sys/pcq.h>
35 #include <sys/workqueue.h>
36 #include <sys/interrupt.h>
37 
38 #include <net/bpf.h>
39 #include <net/if.h>
40 #include <net/if_ether.h>
41 #include <net/if_media.h>
42 
43 #include <netinet/if_inarp.h>
44 #include <netinet/in_systm.h>	/* for <netinet/ip.h> */
45 #include <netinet/in.h>		/* for <netinet/ip.h> */
46 #include <netinet/ip.h>		/* for struct ip */
47 #include <netinet/ip6.h>	/* for struct ip6_hdr */
48 #include <netinet/tcp.h>	/* for struct tcphdr */
49 #include <netinet/udp.h>	/* for struct udphdr */
50 
51 #include <dev/pci/pcivar.h>
52 #include <dev/pci/pcireg.h>
53 #include <dev/pci/pcidevs.h>
54 
55 #include <dev/pci/if_vmxreg.h>
56 
57 #define VMXNET3_DRIVER_VERSION 0x00010000
58 
59 /*
60  * Max descriptors per Tx packet. We must limit the size of the
61  * any TSO packets based on the number of segments.
62  */
63 #define VMXNET3_TX_MAXSEGS		32
64 #define VMXNET3_TX_MAXSIZE		(VMXNET3_TX_MAXSEGS * MCLBYTES)
65 
66 /*
67  * Maximum support Tx segments size. The length field in the
68  * Tx descriptor is 14 bits.
69  */
70 #define VMXNET3_TX_MAXSEGSIZE		(1 << 14)
71 
72 /*
73  * The maximum number of Rx segments we accept.
74  */
75 #define VMXNET3_MAX_RX_SEGS		0	/* no segments */
76 
77 /*
78  * Predetermined size of the multicast MACs filter table. If the
79  * number of multicast addresses exceeds this size, then the
80  * ALL_MULTI mode is use instead.
81  */
82 #define VMXNET3_MULTICAST_MAX		32
83 
84 /*
85  * Our Tx watchdog timeout.
86  */
87 #define VMXNET3_WATCHDOG_TIMEOUT	5
88 
89 /*
90  * Default value for vmx_intr_{rx,tx}_process_limit which is used for
91  * max number of packets to process for interrupt handler
92  */
93 #define VMXNET3_RX_INTR_PROCESS_LIMIT 0U
94 #define VMXNET3_TX_INTR_PROCESS_LIMIT 256
95 
96 /*
97  * Default value for vmx_{rx,tx}_process_limit which is used for
98  * max number of packets to process for deferred processing
99  */
100 #define VMXNET3_RX_PROCESS_LIMIT 256
101 #define VMXNET3_TX_PROCESS_LIMIT 256
102 
103 #define VMXNET3_WORKQUEUE_PRI PRI_SOFTNET
104 
105 /*
106  * IP protocols that we can perform Tx checksum offloading of.
107  */
108 #define VMXNET3_CSUM_OFFLOAD \
109     (M_CSUM_TCPv4 | M_CSUM_UDPv4)
110 #define VMXNET3_CSUM_OFFLOAD_IPV6 \
111     (M_CSUM_TCPv6 | M_CSUM_UDPv6)
112 
113 #define VMXNET3_CSUM_ALL_OFFLOAD \
114     (VMXNET3_CSUM_OFFLOAD | VMXNET3_CSUM_OFFLOAD_IPV6 | M_CSUM_TSOv4 | M_CSUM_TSOv6)
115 
116 #define VMXNET3_RXRINGS_PERQ 2
117 
118 #define VMXNET3_CORE_LOCK(_sc)		mutex_enter((_sc)->vmx_mtx)
119 #define VMXNET3_CORE_UNLOCK(_sc)	mutex_exit((_sc)->vmx_mtx)
120 #define VMXNET3_CORE_LOCK_ASSERT(_sc)	mutex_owned((_sc)->vmx_mtx)
121 
122 #define VMXNET3_RXQ_LOCK(_rxq)		mutex_enter((_rxq)->vxrxq_mtx)
123 #define VMXNET3_RXQ_UNLOCK(_rxq)	mutex_exit((_rxq)->vxrxq_mtx)
124 #define VMXNET3_RXQ_LOCK_ASSERT(_rxq)		\
125     mutex_owned((_rxq)->vxrxq_mtx)
126 
127 #define VMXNET3_TXQ_LOCK(_txq)		mutex_enter((_txq)->vxtxq_mtx)
128 #define VMXNET3_TXQ_TRYLOCK(_txq)	mutex_tryenter((_txq)->vxtxq_mtx)
129 #define VMXNET3_TXQ_UNLOCK(_txq)	mutex_exit((_txq)->vxtxq_mtx)
130 #define VMXNET3_TXQ_LOCK_ASSERT(_txq)		\
131     mutex_owned((_txq)->vxtxq_mtx)
132 
133 struct vmxnet3_dma_alloc {
134 	bus_addr_t dma_paddr;
135 	void *dma_vaddr;
136 	bus_dmamap_t dma_map;
137 	bus_size_t dma_size;
138 	bus_dma_segment_t dma_segs[1];
139 };
140 
141 struct vmxnet3_txbuf {
142 	bus_dmamap_t vtxb_dmamap;
143 	struct mbuf *vtxb_m;
144 };
145 
146 struct vmxnet3_txring {
147 	struct vmxnet3_txbuf *vxtxr_txbuf;
148 	struct vmxnet3_txdesc *vxtxr_txd;
149 	u_int vxtxr_head;
150 	u_int vxtxr_next;
151 	u_int vxtxr_ndesc;
152 	int vxtxr_gen;
153 	struct vmxnet3_dma_alloc vxtxr_dma;
154 };
155 
156 struct vmxnet3_rxbuf {
157 	bus_dmamap_t vrxb_dmamap;
158 	struct mbuf *vrxb_m;
159 };
160 
161 struct vmxnet3_rxring {
162 	struct vmxnet3_rxbuf *vxrxr_rxbuf;
163 	struct vmxnet3_rxdesc *vxrxr_rxd;
164 	u_int vxrxr_fill;
165 	u_int vxrxr_ndesc;
166 	int vxrxr_gen;
167 	int vxrxr_rid;
168 	struct vmxnet3_dma_alloc vxrxr_dma;
169 	bus_dmamap_t vxrxr_spare_dmap;
170 };
171 
172 struct vmxnet3_comp_ring {
173 	union {
174 		struct vmxnet3_txcompdesc *txcd;
175 		struct vmxnet3_rxcompdesc *rxcd;
176 	} vxcr_u;
177 	u_int vxcr_next;
178 	u_int vxcr_ndesc;
179 	int vxcr_gen;
180 	struct vmxnet3_dma_alloc vxcr_dma;
181 };
182 
183 struct vmxnet3_txq_stats {
184 #if 0
185 	uint64_t vmtxs_opackets;	/* if_opackets */
186 	uint64_t vmtxs_obytes;		/* if_obytes */
187 	uint64_t vmtxs_omcasts;		/* if_omcasts */
188 #endif
189 	uint64_t vmtxs_csum;
190 	uint64_t vmtxs_tso;
191 	uint64_t vmtxs_full;
192 	uint64_t vmtxs_offload_failed;
193 };
194 
195 struct vmxnet3_txqueue {
196 	kmutex_t *vxtxq_mtx;
197 	struct vmxnet3_softc *vxtxq_sc;
198 	int vxtxq_watchdog;
199 	pcq_t *vxtxq_interq;
200 	struct vmxnet3_txring vxtxq_cmd_ring;
201 	struct vmxnet3_comp_ring vxtxq_comp_ring;
202 	struct vmxnet3_txq_stats vxtxq_stats;
203 	struct vmxnet3_txq_shared *vxtxq_ts;
204 	char vxtxq_name[16];
205 
206 	void *vxtxq_si;
207 
208 	struct evcnt vxtxq_intr;
209 	struct evcnt vxtxq_defer;
210 	struct evcnt vxtxq_deferreq;
211 	struct evcnt vxtxq_pcqdrop;
212 	struct evcnt vxtxq_transmitdef;
213 	struct evcnt vxtxq_watchdogto;
214 	struct evcnt vxtxq_defragged;
215 	struct evcnt vxtxq_defrag_failed;
216 };
217 
218 #if 0
219 struct vmxnet3_rxq_stats {
220 	uint64_t vmrxs_ipackets;	/* if_ipackets */
221 	uint64_t vmrxs_ibytes;		/* if_ibytes */
222 	uint64_t vmrxs_iqdrops;		/* if_iqdrops */
223 	uint64_t vmrxs_ierrors;		/* if_ierrors */
224 };
225 #endif
226 
227 struct vmxnet3_rxqueue {
228 	kmutex_t *vxrxq_mtx;
229 	struct vmxnet3_softc *vxrxq_sc;
230 	struct mbuf *vxrxq_mhead;
231 	struct mbuf *vxrxq_mtail;
232 	struct vmxnet3_rxring vxrxq_cmd_ring[VMXNET3_RXRINGS_PERQ];
233 	struct vmxnet3_comp_ring vxrxq_comp_ring;
234 #if 0
235 	struct vmxnet3_rxq_stats vxrxq_stats;
236 #endif
237 	struct vmxnet3_rxq_shared *vxrxq_rs;
238 	char vxrxq_name[16];
239 
240 	struct evcnt vxrxq_intr;
241 	struct evcnt vxrxq_defer;
242 	struct evcnt vxrxq_deferreq;
243 	struct evcnt vxrxq_mgetcl_failed;
244 	struct evcnt vxrxq_mbuf_load_failed;
245 };
246 
247 struct vmxnet3_queue {
248 	int vxq_id;
249 	int vxq_intr_idx;
250 
251 	struct vmxnet3_txqueue vxq_txqueue;
252 	struct vmxnet3_rxqueue vxq_rxqueue;
253 
254 	void *vxq_si;
255 	bool vxq_workqueue;
256 	struct work vxq_wq_cookie;
257 };
258 
259 struct vmxnet3_softc {
260 	device_t vmx_dev;
261 	struct ethercom vmx_ethercom;
262 	struct ifmedia vmx_media;
263 	struct vmxnet3_driver_shared *vmx_ds;
264 	int vmx_flags;
265 #define VMXNET3_FLAG_NO_MSIX	(1 << 0)
266 #define VMXNET3_FLAG_RSS	(1 << 1)
267 #define VMXNET3_FLAG_ATTACHED	(1 << 2)
268 
269 	struct vmxnet3_queue *vmx_queue;
270 
271 	struct pci_attach_args *vmx_pa;
272 	pci_chipset_tag_t vmx_pc;
273 
274 	bus_space_tag_t vmx_iot0;
275 	bus_space_tag_t vmx_iot1;
276 	bus_space_handle_t vmx_ioh0;
277 	bus_space_handle_t vmx_ioh1;
278 	bus_size_t vmx_ios0;
279 	bus_size_t vmx_ios1;
280 	bus_dma_tag_t vmx_dmat;
281 
282 	int vmx_link_active;
283 	int vmx_ntxqueues;
284 	int vmx_nrxqueues;
285 	int vmx_ntxdescs;
286 	int vmx_nrxdescs;
287 	int vmx_max_rxsegs;
288 
289 	struct evcnt vmx_event_intr;
290 	struct evcnt vmx_event_link;
291 	struct evcnt vmx_event_txqerror;
292 	struct evcnt vmx_event_rxqerror;
293 	struct evcnt vmx_event_dic;
294 	struct evcnt vmx_event_debug;
295 
296 	int vmx_intr_type;
297 	int vmx_intr_mask_mode;
298 	int vmx_event_intr_idx;
299 	int vmx_nintrs;
300 	pci_intr_handle_t *vmx_intrs;	/* legacy use vmx_intrs[0] */
301 	void *vmx_ihs[VMXNET3_MAX_INTRS];
302 
303 	kmutex_t *vmx_mtx;
304 
305 	uint8_t *vmx_mcast;
306 	void *vmx_qs;
307 	struct vmxnet3_rss_shared *vmx_rss;
308 	callout_t vmx_tick;
309 	struct vmxnet3_dma_alloc vmx_ds_dma;
310 	struct vmxnet3_dma_alloc vmx_qs_dma;
311 	struct vmxnet3_dma_alloc vmx_mcast_dma;
312 	struct vmxnet3_dma_alloc vmx_rss_dma;
313 	int vmx_max_ntxqueues;
314 	int vmx_max_nrxqueues;
315 	uint8_t vmx_lladdr[ETHER_ADDR_LEN];
316 
317 	u_int vmx_rx_intr_process_limit;
318 	u_int vmx_tx_intr_process_limit;
319 	u_int vmx_rx_process_limit;
320 	u_int vmx_tx_process_limit;
321 	struct sysctllog *vmx_sysctllog;
322 
323 	bool vmx_txrx_workqueue;
324 	struct workqueue *vmx_queue_wq;
325 };
326 
327 #define VMXNET3_STAT
328 
329 #ifdef VMXNET3_STAT
330 struct {
331 	u_int txhead;
332 	u_int txdone;
333 	u_int maxtxlen;
334 	u_int rxdone;
335 	u_int rxfill;
336 	u_int intr;
337 } vmxstat;
338 #endif
339 
340 typedef enum {
341 	VMXNET3_BARRIER_RD,
342 	VMXNET3_BARRIER_WR,
343 	VMXNET3_BARRIER_RDWR,
344 } vmxnet3_barrier_t;
345 
346 #define JUMBO_LEN (MCLBYTES - ETHER_ALIGN)	/* XXX */
347 #define DMAADDR(map) ((map)->dm_segs[0].ds_addr)
348 
349 #define vtophys(va) 0		/* XXX ok? */
350 
351 static int vmxnet3_match(device_t, cfdata_t, void *);
352 static void vmxnet3_attach(device_t, device_t, void *);
353 static int vmxnet3_detach(device_t, int);
354 
355 static int vmxnet3_alloc_pci_resources(struct vmxnet3_softc *);
356 static void vmxnet3_free_pci_resources(struct vmxnet3_softc *);
357 static int vmxnet3_check_version(struct vmxnet3_softc *);
358 static void vmxnet3_check_multiqueue(struct vmxnet3_softc *);
359 
360 static int vmxnet3_alloc_msix_interrupts(struct vmxnet3_softc *);
361 static int vmxnet3_alloc_msi_interrupts(struct vmxnet3_softc *);
362 static int vmxnet3_alloc_legacy_interrupts(struct vmxnet3_softc *);
363 static int vmxnet3_alloc_interrupts(struct vmxnet3_softc *);
364 static void vmxnet3_free_interrupts(struct vmxnet3_softc *);
365 
366 static int vmxnet3_setup_msix_interrupts(struct vmxnet3_softc *);
367 static int vmxnet3_setup_msi_interrupt(struct vmxnet3_softc *);
368 static int vmxnet3_setup_legacy_interrupt(struct vmxnet3_softc *);
369 static void vmxnet3_set_interrupt_idx(struct vmxnet3_softc *);
370 static int vmxnet3_setup_interrupts(struct vmxnet3_softc *);
371 static int vmxnet3_setup_sysctl(struct vmxnet3_softc *);
372 
373 static int vmxnet3_setup_stats(struct vmxnet3_softc *);
374 static void vmxnet3_teardown_stats(struct vmxnet3_softc *);
375 
376 static int vmxnet3_init_rxq(struct vmxnet3_softc *, int);
377 static int vmxnet3_init_txq(struct vmxnet3_softc *, int);
378 static int vmxnet3_alloc_rxtx_queues(struct vmxnet3_softc *);
379 static void vmxnet3_destroy_rxq(struct vmxnet3_rxqueue *);
380 static void vmxnet3_destroy_txq(struct vmxnet3_txqueue *);
381 static void vmxnet3_free_rxtx_queues(struct vmxnet3_softc *);
382 
383 static int vmxnet3_alloc_shared_data(struct vmxnet3_softc *);
384 static void vmxnet3_free_shared_data(struct vmxnet3_softc *);
385 static int vmxnet3_alloc_txq_data(struct vmxnet3_softc *);
386 static void vmxnet3_free_txq_data(struct vmxnet3_softc *);
387 static int vmxnet3_alloc_rxq_data(struct vmxnet3_softc *);
388 static void vmxnet3_free_rxq_data(struct vmxnet3_softc *);
389 static int vmxnet3_alloc_queue_data(struct vmxnet3_softc *);
390 static void vmxnet3_free_queue_data(struct vmxnet3_softc *);
391 static int vmxnet3_alloc_mcast_table(struct vmxnet3_softc *);
392 static void vmxnet3_free_mcast_table(struct vmxnet3_softc *);
393 static void vmxnet3_init_shared_data(struct vmxnet3_softc *);
394 static void vmxnet3_reinit_rss_shared_data(struct vmxnet3_softc *);
395 static void vmxnet3_reinit_shared_data(struct vmxnet3_softc *);
396 static int vmxnet3_alloc_data(struct vmxnet3_softc *);
397 static void vmxnet3_free_data(struct vmxnet3_softc *);
398 static int vmxnet3_setup_interface(struct vmxnet3_softc *);
399 
400 static void vmxnet3_evintr(struct vmxnet3_softc *);
401 static bool vmxnet3_txq_eof(struct vmxnet3_txqueue *, u_int);
402 static int vmxnet3_newbuf(struct vmxnet3_softc *, struct vmxnet3_rxqueue *,
403     struct vmxnet3_rxring *);
404 static void vmxnet3_rxq_eof_discard(struct vmxnet3_rxqueue *,
405     struct vmxnet3_rxring *, int);
406 static void vmxnet3_rxq_discard_chain(struct vmxnet3_rxqueue *);
407 static void vmxnet3_rx_csum(struct vmxnet3_rxcompdesc *, struct mbuf *);
408 static void vmxnet3_rxq_input(struct vmxnet3_rxqueue *,
409     struct vmxnet3_rxcompdesc *, struct mbuf *);
410 static bool vmxnet3_rxq_eof(struct vmxnet3_rxqueue *, u_int);
411 static int vmxnet3_legacy_intr(void *);
412 static int vmxnet3_txrxq_intr(void *);
413 static void vmxnet3_handle_queue(void *);
414 static void vmxnet3_handle_queue_work(struct work *, void *);
415 static int vmxnet3_event_intr(void *);
416 
417 static void vmxnet3_txstop(struct vmxnet3_softc *, struct vmxnet3_txqueue *);
418 static void vmxnet3_rxstop(struct vmxnet3_softc *, struct vmxnet3_rxqueue *);
419 static void vmxnet3_stop_locked(struct vmxnet3_softc *);
420 static void vmxnet3_stop_rendezvous(struct vmxnet3_softc *);
421 static void vmxnet3_stop(struct ifnet *, int);
422 
423 static void vmxnet3_txinit(struct vmxnet3_softc *, struct vmxnet3_txqueue *);
424 static int vmxnet3_rxinit(struct vmxnet3_softc *, struct vmxnet3_rxqueue *);
425 static int vmxnet3_reinit_queues(struct vmxnet3_softc *);
426 static int vmxnet3_enable_device(struct vmxnet3_softc *);
427 static void vmxnet3_reinit_rxfilters(struct vmxnet3_softc *);
428 static int vmxnet3_reinit(struct vmxnet3_softc *);
429 
430 static int vmxnet3_init_locked(struct vmxnet3_softc *);
431 static int vmxnet3_init(struct ifnet *);
432 
433 static int vmxnet3_txq_offload_ctx(struct vmxnet3_txqueue *, struct mbuf *, int *, int *);
434 static int vmxnet3_txq_load_mbuf(struct vmxnet3_txqueue *, struct mbuf **, bus_dmamap_t);
435 static void vmxnet3_txq_unload_mbuf(struct vmxnet3_txqueue *, bus_dmamap_t);
436 static int vmxnet3_txq_encap(struct vmxnet3_txqueue *, struct mbuf **);
437 static void vmxnet3_start_locked(struct ifnet *);
438 static void vmxnet3_start(struct ifnet *);
439 static void vmxnet3_transmit_locked(struct ifnet *, struct vmxnet3_txqueue *);
440 static int vmxnet3_transmit(struct ifnet *, struct mbuf *);
441 static void vmxnet3_deferred_transmit(void *);
442 
443 static void vmxnet3_set_rxfilter(struct vmxnet3_softc *);
444 static int vmxnet3_ioctl(struct ifnet *, u_long, void *);
445 static int vmxnet3_ifflags_cb(struct ethercom *);
446 
447 static int vmxnet3_watchdog(struct vmxnet3_txqueue *);
448 static void vmxnet3_refresh_host_stats(struct vmxnet3_softc *);
449 static void vmxnet3_tick(void *);
450 static void vmxnet3_if_link_status(struct vmxnet3_softc *);
451 static bool vmxnet3_cmd_link_status(struct ifnet *);
452 static void vmxnet3_ifmedia_status(struct ifnet *, struct ifmediareq *);
453 static int vmxnet3_ifmedia_change(struct ifnet *);
454 static void vmxnet3_set_lladdr(struct vmxnet3_softc *);
455 static void vmxnet3_get_lladdr(struct vmxnet3_softc *);
456 
457 static void vmxnet3_enable_all_intrs(struct vmxnet3_softc *);
458 static void vmxnet3_disable_all_intrs(struct vmxnet3_softc *);
459 
460 static int vmxnet3_dma_malloc(struct vmxnet3_softc *, bus_size_t, bus_size_t,
461     struct vmxnet3_dma_alloc *);
462 static void vmxnet3_dma_free(struct vmxnet3_softc *, struct vmxnet3_dma_alloc *);
463 
464 CFATTACH_DECL3_NEW(vmx, sizeof(struct vmxnet3_softc),
465     vmxnet3_match, vmxnet3_attach, vmxnet3_detach, NULL, NULL, NULL, 0);
466 
467 /* round down to the nearest power of 2 */
468 static int
469 vmxnet3_calc_queue_size(int n)
470 {
471 
472 	if (__predict_false(n <= 0))
473 		return 1;
474 
475 	return (1U << (fls32(n) - 1));
476 }
477 
478 static inline void
479 vmxnet3_write_bar0(struct vmxnet3_softc *sc, bus_size_t r, uint32_t v)
480 {
481 
482 	bus_space_write_4(sc->vmx_iot0, sc->vmx_ioh0, r, v);
483 }
484 
485 static inline uint32_t
486 vmxnet3_read_bar1(struct vmxnet3_softc *sc, bus_size_t r)
487 {
488 
489 	return (bus_space_read_4(sc->vmx_iot1, sc->vmx_ioh1, r));
490 }
491 
492 static inline void
493 vmxnet3_write_bar1(struct vmxnet3_softc *sc, bus_size_t r, uint32_t v)
494 {
495 
496 	bus_space_write_4(sc->vmx_iot1, sc->vmx_ioh1, r, v);
497 }
498 
499 static inline void
500 vmxnet3_write_cmd(struct vmxnet3_softc *sc, uint32_t cmd)
501 {
502 
503 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_CMD, cmd);
504 }
505 
506 static inline uint32_t
507 vmxnet3_read_cmd(struct vmxnet3_softc *sc, uint32_t cmd)
508 {
509 
510 	vmxnet3_write_cmd(sc, cmd);
511 	return (vmxnet3_read_bar1(sc, VMXNET3_BAR1_CMD));
512 }
513 
514 static inline void
515 vmxnet3_enable_intr(struct vmxnet3_softc *sc, int irq)
516 {
517 	vmxnet3_write_bar0(sc, VMXNET3_BAR0_IMASK(irq), 0);
518 }
519 
520 static inline void
521 vmxnet3_disable_intr(struct vmxnet3_softc *sc, int irq)
522 {
523 	vmxnet3_write_bar0(sc, VMXNET3_BAR0_IMASK(irq), 1);
524 }
525 
526 static inline void
527 vmxnet3_rxr_increment_fill(struct vmxnet3_rxring *rxr)
528 {
529 
530 	if (++rxr->vxrxr_fill == rxr->vxrxr_ndesc) {
531 		rxr->vxrxr_fill = 0;
532 		rxr->vxrxr_gen ^= 1;
533 	}
534 }
535 
536 static inline int
537 vmxnet3_txring_avail(struct vmxnet3_txring *txr)
538 {
539 	int avail = txr->vxtxr_next - txr->vxtxr_head - 1;
540 	return (avail < 0 ? (int)txr->vxtxr_ndesc + avail : avail);
541 }
542 
543 /*
544  * Since this is a purely paravirtualized device, we do not have
545  * to worry about DMA coherency. But at times, we must make sure
546  * both the compiler and CPU do not reorder memory operations.
547  */
548 static inline void
549 vmxnet3_barrier(struct vmxnet3_softc *sc, vmxnet3_barrier_t type)
550 {
551 
552 	switch (type) {
553 	case VMXNET3_BARRIER_RD:
554 		membar_consumer();
555 		break;
556 	case VMXNET3_BARRIER_WR:
557 		membar_producer();
558 		break;
559 	case VMXNET3_BARRIER_RDWR:
560 		membar_sync();
561 		break;
562 	default:
563 		panic("%s: bad barrier type %d", __func__, type);
564 	}
565 }
566 
567 static int
568 vmxnet3_match(device_t parent, cfdata_t match, void *aux)
569 {
570 	struct pci_attach_args *pa = (struct pci_attach_args *)aux;
571 
572 	if (PCI_VENDOR(pa->pa_id) == PCI_VENDOR_VMWARE &&
573 	    PCI_PRODUCT(pa->pa_id) == PCI_PRODUCT_VMWARE_VMXNET3)
574 		return 1;
575 
576 	return 0;
577 }
578 
579 static void
580 vmxnet3_attach(device_t parent, device_t self, void *aux)
581 {
582 	struct vmxnet3_softc *sc = device_private(self);
583 	struct pci_attach_args *pa = aux;
584 	pcireg_t preg;
585 	int error;
586 	int candidate;
587 
588 	sc->vmx_dev = self;
589 	sc->vmx_pa = pa;
590 	sc->vmx_pc = pa->pa_pc;
591 	if (pci_dma64_available(pa))
592 		sc->vmx_dmat = pa->pa_dmat64;
593 	else
594 		sc->vmx_dmat = pa->pa_dmat;
595 
596 	pci_aprint_devinfo_fancy(pa, "Ethernet controller", "vmxnet3", 1);
597 
598 	preg = pci_conf_read(pa->pa_pc, pa->pa_tag, PCI_COMMAND_STATUS_REG);
599 	preg |= PCI_COMMAND_MASTER_ENABLE;
600 	pci_conf_write(pa->pa_pc, pa->pa_tag, PCI_COMMAND_STATUS_REG, preg);
601 
602 	sc->vmx_mtx = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NET);
603 	callout_init(&sc->vmx_tick, CALLOUT_MPSAFE);
604 
605 	candidate = MIN(MIN(VMXNET3_MAX_TX_QUEUES, VMXNET3_MAX_RX_QUEUES),
606 	    ncpu);
607 	sc->vmx_max_ntxqueues = sc->vmx_max_nrxqueues =
608 	    vmxnet3_calc_queue_size(candidate);
609 	sc->vmx_ntxdescs = 512;
610 	sc->vmx_nrxdescs = 256;
611 	sc->vmx_max_rxsegs = VMXNET3_MAX_RX_SEGS;
612 
613 	error = vmxnet3_alloc_pci_resources(sc);
614 	if (error)
615 		return;
616 
617 	error = vmxnet3_check_version(sc);
618 	if (error)
619 		return;
620 
621 	error = vmxnet3_alloc_rxtx_queues(sc);
622 	if (error)
623 		return;
624 
625 	error = vmxnet3_alloc_interrupts(sc);
626 	if (error)
627 		return;
628 
629 	vmxnet3_check_multiqueue(sc);
630 
631 	error = vmxnet3_alloc_data(sc);
632 	if (error)
633 		return;
634 
635 	error = vmxnet3_setup_interface(sc);
636 	if (error)
637 		return;
638 
639 	error = vmxnet3_setup_interrupts(sc);
640 	if (error)
641 		return;
642 
643 	error = vmxnet3_setup_sysctl(sc);
644 	if (error)
645 		return;
646 
647 	error = vmxnet3_setup_stats(sc);
648 	if (error)
649 		return;
650 
651 	sc->vmx_flags |= VMXNET3_FLAG_ATTACHED;
652 }
653 
654 static int
655 vmxnet3_detach(device_t self, int flags)
656 {
657 	struct vmxnet3_softc *sc;
658 	struct ifnet *ifp;
659 
660 	sc = device_private(self);
661 	ifp = &sc->vmx_ethercom.ec_if;
662 
663 	if (sc->vmx_flags & VMXNET3_FLAG_ATTACHED) {
664 		VMXNET3_CORE_LOCK(sc);
665 		vmxnet3_stop_locked(sc);
666 		callout_halt(&sc->vmx_tick, sc->vmx_mtx);
667 		callout_destroy(&sc->vmx_tick);
668 		VMXNET3_CORE_UNLOCK(sc);
669 
670 		ether_ifdetach(ifp);
671 		if_detach(ifp);
672 		ifmedia_fini(&sc->vmx_media);
673 	}
674 
675 	vmxnet3_teardown_stats(sc);
676 	sysctl_teardown(&sc->vmx_sysctllog);
677 
678 	vmxnet3_free_interrupts(sc);
679 
680 	vmxnet3_free_data(sc);
681 	vmxnet3_free_pci_resources(sc);
682 	vmxnet3_free_rxtx_queues(sc);
683 
684 	if (sc->vmx_mtx)
685 		mutex_obj_free(sc->vmx_mtx);
686 
687 	return (0);
688 }
689 
690 static int
691 vmxnet3_alloc_pci_resources(struct vmxnet3_softc *sc)
692 {
693 	struct pci_attach_args *pa = sc->vmx_pa;
694 	pcireg_t memtype;
695 
696 	memtype = pci_mapreg_type(pa->pa_pc, pa->pa_tag, PCI_BAR(0));
697 	if (pci_mapreg_map(pa, PCI_BAR(0), memtype, 0, &sc->vmx_iot0, &sc->vmx_ioh0,
698 	    NULL, &sc->vmx_ios0)) {
699 		aprint_error_dev(sc->vmx_dev, "failed to map BAR0\n");
700 		return (ENXIO);
701 	}
702 	memtype = pci_mapreg_type(pa->pa_pc, pa->pa_tag, PCI_BAR(1));
703 	if (pci_mapreg_map(pa, PCI_BAR(1), memtype, 0, &sc->vmx_iot1, &sc->vmx_ioh1,
704 	    NULL, &sc->vmx_ios1)) {
705 		aprint_error_dev(sc->vmx_dev, "failed to map BAR1\n");
706 		return (ENXIO);
707 	}
708 
709 	if (!pci_get_capability(pa->pa_pc, pa->pa_tag, PCI_CAP_MSIX, NULL, NULL)) {
710 		sc->vmx_flags |= VMXNET3_FLAG_NO_MSIX;
711 		return (0);
712 	}
713 
714 	return (0);
715 }
716 
717 static void
718 vmxnet3_free_pci_resources(struct vmxnet3_softc *sc)
719 {
720 
721 	if (sc->vmx_ios0) {
722 		bus_space_unmap(sc->vmx_iot0, sc->vmx_ioh0, sc->vmx_ios0);
723 		sc->vmx_ios0 = 0;
724 	}
725 
726 	if (sc->vmx_ios1) {
727 		bus_space_unmap(sc->vmx_iot1, sc->vmx_ioh1, sc->vmx_ios1);
728 		sc->vmx_ios1 = 0;
729 	}
730 }
731 
732 static int
733 vmxnet3_check_version(struct vmxnet3_softc *sc)
734 {
735 	u_int ver;
736 
737 	ver = vmxnet3_read_bar1(sc, VMXNET3_BAR1_VRRS);
738 	if ((ver & 0x1) == 0) {
739 		aprint_error_dev(sc->vmx_dev,
740 		    "unsupported hardware version 0x%x\n", ver);
741 		return (ENOTSUP);
742 	}
743 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_VRRS, 1);
744 
745 	ver = vmxnet3_read_bar1(sc, VMXNET3_BAR1_UVRS);
746 	if ((ver & 0x1) == 0) {
747 		aprint_error_dev(sc->vmx_dev,
748 		    "incompatiable UPT version 0x%x\n", ver);
749 		return (ENOTSUP);
750 	}
751 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_UVRS, 1);
752 
753 	return (0);
754 }
755 
756 static void
757 vmxnet3_check_multiqueue(struct vmxnet3_softc *sc)
758 {
759 
760 	if (sc->vmx_intr_type != VMXNET3_IT_MSIX)
761 		goto out;
762 
763 	/* Just use the maximum configured for now. */
764 	sc->vmx_nrxqueues = sc->vmx_max_nrxqueues;
765 	sc->vmx_ntxqueues = sc->vmx_max_ntxqueues;
766 
767 	if (sc->vmx_nrxqueues > 1)
768 		sc->vmx_flags |= VMXNET3_FLAG_RSS;
769 
770 	return;
771 
772 out:
773 	sc->vmx_ntxqueues = 1;
774 	sc->vmx_nrxqueues = 1;
775 }
776 
777 static int
778 vmxnet3_alloc_msix_interrupts(struct vmxnet3_softc *sc)
779 {
780 	int required;
781 	struct pci_attach_args *pa = sc->vmx_pa;
782 
783 	if (sc->vmx_flags & VMXNET3_FLAG_NO_MSIX)
784 		return (1);
785 
786 	/* Allocate an additional vector for the events interrupt. */
787 	required = MIN(sc->vmx_max_ntxqueues, sc->vmx_max_nrxqueues) + 1;
788 
789 	if (pci_msix_count(pa->pa_pc, pa->pa_tag) < required)
790 		return (1);
791 
792 	if (pci_msix_alloc_exact(pa, &sc->vmx_intrs, required) == 0) {
793 		sc->vmx_nintrs = required;
794 		return (0);
795 	}
796 
797 	return (1);
798 }
799 
800 static int
801 vmxnet3_alloc_msi_interrupts(struct vmxnet3_softc *sc)
802 {
803 	int nmsi, required;
804 	struct pci_attach_args *pa = sc->vmx_pa;
805 
806 	required = 1;
807 
808 	nmsi = pci_msi_count(pa->pa_pc, pa->pa_tag);
809 	if (nmsi < required)
810 		return (1);
811 
812 	if (pci_msi_alloc_exact(pa, &sc->vmx_intrs, required) == 0) {
813 		sc->vmx_nintrs = required;
814 		return (0);
815 	}
816 
817 	return (1);
818 }
819 
820 static int
821 vmxnet3_alloc_legacy_interrupts(struct vmxnet3_softc *sc)
822 {
823 
824 	if (pci_intx_alloc(sc->vmx_pa, &sc->vmx_intrs) == 0) {
825 		sc->vmx_nintrs = 1;
826 		return (0);
827 	}
828 
829 	return (1);
830 }
831 
832 static int
833 vmxnet3_alloc_interrupts(struct vmxnet3_softc *sc)
834 {
835 	u_int config;
836 	int error;
837 
838 	config = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_INTRCFG);
839 
840 	sc->vmx_intr_type = config & 0x03;
841 	sc->vmx_intr_mask_mode = (config >> 2) & 0x03;
842 
843 	switch (sc->vmx_intr_type) {
844 	case VMXNET3_IT_AUTO:
845 		sc->vmx_intr_type = VMXNET3_IT_MSIX;
846 		/* FALLTHROUGH */
847 	case VMXNET3_IT_MSIX:
848 		error = vmxnet3_alloc_msix_interrupts(sc);
849 		if (error == 0)
850 			break;
851 		sc->vmx_intr_type = VMXNET3_IT_MSI;
852 		/* FALLTHROUGH */
853 	case VMXNET3_IT_MSI:
854 		error = vmxnet3_alloc_msi_interrupts(sc);
855 		if (error == 0)
856 			break;
857 		sc->vmx_intr_type = VMXNET3_IT_LEGACY;
858 		/* FALLTHROUGH */
859 	case VMXNET3_IT_LEGACY:
860 		error = vmxnet3_alloc_legacy_interrupts(sc);
861 		if (error == 0)
862 			break;
863 		/* FALLTHROUGH */
864 	default:
865 		sc->vmx_intr_type = -1;
866 		aprint_error_dev(sc->vmx_dev, "cannot allocate any interrupt resources\n");
867 		return (ENXIO);
868 	}
869 
870 	return (error);
871 }
872 
873 static void
874 vmxnet3_free_interrupts(struct vmxnet3_softc *sc)
875 {
876 	pci_chipset_tag_t pc = sc->vmx_pc;
877 	int i;
878 
879 	workqueue_destroy(sc->vmx_queue_wq);
880 	for (i = 0; i < sc->vmx_ntxqueues; i++) {
881 		struct vmxnet3_queue *vmxq =  &sc->vmx_queue[i];
882 
883 		softint_disestablish(vmxq->vxq_si);
884 		vmxq->vxq_si = NULL;
885 	}
886 	for (i = 0; i < sc->vmx_nintrs; i++) {
887 		pci_intr_disestablish(pc, sc->vmx_ihs[i]);
888 	}
889 	pci_intr_release(pc, sc->vmx_intrs, sc->vmx_nintrs);
890 }
891 
892 static int
893 vmxnet3_setup_msix_interrupts(struct vmxnet3_softc *sc)
894 {
895 	pci_chipset_tag_t pc = sc->vmx_pa->pa_pc;
896 	struct vmxnet3_queue *vmxq;
897 	pci_intr_handle_t *intr;
898 	void **ihs;
899 	int intr_idx, i, use_queues, error;
900 	kcpuset_t *affinity;
901 	const char *intrstr;
902 	char intrbuf[PCI_INTRSTR_LEN];
903 	char xnamebuf[32];
904 
905 	intr = sc->vmx_intrs;
906 	intr_idx = 0;
907 	ihs = sc->vmx_ihs;
908 
909 	/* See vmxnet3_alloc_msix_interrupts() */
910 	use_queues = MIN(sc->vmx_max_ntxqueues, sc->vmx_max_nrxqueues);
911 	for (i = 0; i < use_queues; i++, intr++, ihs++, intr_idx++) {
912 		snprintf(xnamebuf, 32, "%s: txrx %d", device_xname(sc->vmx_dev), i);
913 
914 		vmxq = &sc->vmx_queue[i];
915 
916 		intrstr = pci_intr_string(pc, *intr, intrbuf, sizeof(intrbuf));
917 
918 		pci_intr_setattr(pc, intr, PCI_INTR_MPSAFE, true);
919 		*ihs = pci_intr_establish_xname(pc, *intr, IPL_NET,
920 		    vmxnet3_txrxq_intr, vmxq, xnamebuf);
921 		if (*ihs == NULL) {
922 			aprint_error_dev(sc->vmx_dev,
923 			    "unable to establish txrx interrupt at %s\n", intrstr);
924 			return (-1);
925 		}
926 		aprint_normal_dev(sc->vmx_dev, "txrx interrupting at %s\n", intrstr);
927 
928 		kcpuset_create(&affinity, true);
929 		kcpuset_set(affinity, intr_idx % ncpu);
930 		error = interrupt_distribute(*ihs, affinity, NULL);
931 		if (error) {
932 			aprint_normal_dev(sc->vmx_dev,
933 			    "%s cannot be changed affinity, use default CPU\n",
934 			    intrstr);
935 		}
936 		kcpuset_destroy(affinity);
937 
938 		vmxq->vxq_si = softint_establish(SOFTINT_NET | SOFTINT_MPSAFE,
939 		    vmxnet3_handle_queue, vmxq);
940 		if (vmxq->vxq_si == NULL) {
941 			aprint_error_dev(sc->vmx_dev,
942 			    "softint_establish for vxq_si failed\n");
943 			return (-1);
944 		}
945 
946 		vmxq->vxq_intr_idx = intr_idx;
947 	}
948 	snprintf(xnamebuf, MAXCOMLEN, "%s_tx_rx", device_xname(sc->vmx_dev));
949 	error = workqueue_create(&sc->vmx_queue_wq, xnamebuf,
950 	    vmxnet3_handle_queue_work, sc, VMXNET3_WORKQUEUE_PRI, IPL_NET,
951 	    WQ_PERCPU | WQ_MPSAFE);
952 	if (error) {
953 		aprint_error_dev(sc->vmx_dev, "workqueue_create failed\n");
954 		return (-1);
955 	}
956 	sc->vmx_txrx_workqueue = false;
957 
958 	intrstr = pci_intr_string(pc, *intr, intrbuf, sizeof(intrbuf));
959 
960 	snprintf(xnamebuf, 32, "%s: link", device_xname(sc->vmx_dev));
961 	pci_intr_setattr(pc, intr, PCI_INTR_MPSAFE, true);
962 	*ihs = pci_intr_establish_xname(pc, *intr, IPL_NET,
963 	    vmxnet3_event_intr, sc, xnamebuf);
964 	if (*ihs == NULL) {
965 		aprint_error_dev(sc->vmx_dev,
966 		    "unable to establish event interrupt at %s\n", intrstr);
967 		return (-1);
968 	}
969 	aprint_normal_dev(sc->vmx_dev, "event interrupting at %s\n", intrstr);
970 
971 	sc->vmx_event_intr_idx = intr_idx;
972 
973 	return (0);
974 }
975 
976 static int
977 vmxnet3_setup_msi_interrupt(struct vmxnet3_softc *sc)
978 {
979 	pci_chipset_tag_t pc = sc->vmx_pa->pa_pc;
980 	pci_intr_handle_t *intr;
981 	void **ihs;
982 	struct vmxnet3_queue *vmxq;
983 	int i;
984 	const char *intrstr;
985 	char intrbuf[PCI_INTRSTR_LEN];
986 	char xnamebuf[32];
987 
988 	intr = &sc->vmx_intrs[0];
989 	ihs = sc->vmx_ihs;
990 	vmxq = &sc->vmx_queue[0];
991 
992 	intrstr = pci_intr_string(pc, *intr, intrbuf, sizeof(intrbuf));
993 
994 	snprintf(xnamebuf, 32, "%s: msi", device_xname(sc->vmx_dev));
995 	pci_intr_setattr(pc, intr, PCI_INTR_MPSAFE, true);
996 	*ihs = pci_intr_establish_xname(pc, *intr, IPL_NET,
997 	    vmxnet3_legacy_intr, sc, xnamebuf);
998 	if (*ihs == NULL) {
999 		aprint_error_dev(sc->vmx_dev,
1000 		    "unable to establish interrupt at %s\n", intrstr);
1001 		return (-1);
1002 	}
1003 	aprint_normal_dev(sc->vmx_dev, "interrupting at %s\n", intrstr);
1004 
1005 	vmxq->vxq_si = softint_establish(SOFTINT_NET | SOFTINT_MPSAFE,
1006 	    vmxnet3_handle_queue, vmxq);
1007 	if (vmxq->vxq_si == NULL) {
1008 		aprint_error_dev(sc->vmx_dev,
1009 		    "softint_establish for vxq_si failed\n");
1010 		return (-1);
1011 	}
1012 
1013 	for (i = 0; i < MIN(sc->vmx_nrxqueues, sc->vmx_nrxqueues); i++)
1014 		sc->vmx_queue[i].vxq_intr_idx = 0;
1015 	sc->vmx_event_intr_idx = 0;
1016 
1017 	return (0);
1018 }
1019 
1020 static int
1021 vmxnet3_setup_legacy_interrupt(struct vmxnet3_softc *sc)
1022 {
1023 	pci_chipset_tag_t pc = sc->vmx_pa->pa_pc;
1024 	pci_intr_handle_t *intr;
1025 	void **ihs;
1026 	struct vmxnet3_queue *vmxq;
1027 	int i;
1028 	const char *intrstr;
1029 	char intrbuf[PCI_INTRSTR_LEN];
1030 	char xnamebuf[32];
1031 
1032 	intr = &sc->vmx_intrs[0];
1033 	ihs = sc->vmx_ihs;
1034 	vmxq = &sc->vmx_queue[0];
1035 
1036 	intrstr = pci_intr_string(pc, *intr, intrbuf, sizeof(intrbuf));
1037 
1038 	snprintf(xnamebuf, 32, "%s:legacy", device_xname(sc->vmx_dev));
1039 	pci_intr_setattr(pc, intr, PCI_INTR_MPSAFE, true);
1040 	*ihs = pci_intr_establish_xname(pc, *intr, IPL_NET,
1041 	    vmxnet3_legacy_intr, sc, xnamebuf);
1042 	if (*ihs == NULL) {
1043 		aprint_error_dev(sc->vmx_dev,
1044 		    "unable to establish interrupt at %s\n", intrstr);
1045 		return (-1);
1046 	}
1047 	aprint_normal_dev(sc->vmx_dev, "interrupting at %s\n", intrstr);
1048 
1049 	vmxq->vxq_si = softint_establish(SOFTINT_NET | SOFTINT_MPSAFE,
1050 	    vmxnet3_handle_queue, vmxq);
1051 	if (vmxq->vxq_si == NULL) {
1052 		aprint_error_dev(sc->vmx_dev,
1053 		    "softint_establish for vxq_si failed\n");
1054 		return (-1);
1055 	}
1056 
1057 	for (i = 0; i < MIN(sc->vmx_nrxqueues, sc->vmx_nrxqueues); i++)
1058 		sc->vmx_queue[i].vxq_intr_idx = 0;
1059 	sc->vmx_event_intr_idx = 0;
1060 
1061 	return (0);
1062 }
1063 
1064 static void
1065 vmxnet3_set_interrupt_idx(struct vmxnet3_softc *sc)
1066 {
1067 	struct vmxnet3_queue *vmxq;
1068 	struct vmxnet3_txqueue *txq;
1069 	struct vmxnet3_txq_shared *txs;
1070 	struct vmxnet3_rxqueue *rxq;
1071 	struct vmxnet3_rxq_shared *rxs;
1072 	int i;
1073 
1074 	sc->vmx_ds->evintr = sc->vmx_event_intr_idx;
1075 
1076 	for (i = 0; i < sc->vmx_ntxqueues; i++) {
1077 		vmxq = &sc->vmx_queue[i];
1078 		txq = &vmxq->vxq_txqueue;
1079 		txs = txq->vxtxq_ts;
1080 		txs->intr_idx = vmxq->vxq_intr_idx;
1081 	}
1082 
1083 	for (i = 0; i < sc->vmx_nrxqueues; i++) {
1084 		vmxq = &sc->vmx_queue[i];
1085 		rxq = &vmxq->vxq_rxqueue;
1086 		rxs = rxq->vxrxq_rs;
1087 		rxs->intr_idx = vmxq->vxq_intr_idx;
1088 	}
1089 }
1090 
1091 static int
1092 vmxnet3_setup_interrupts(struct vmxnet3_softc *sc)
1093 {
1094 	int error;
1095 
1096 	switch (sc->vmx_intr_type) {
1097 	case VMXNET3_IT_MSIX:
1098 		error = vmxnet3_setup_msix_interrupts(sc);
1099 		break;
1100 	case VMXNET3_IT_MSI:
1101 		error = vmxnet3_setup_msi_interrupt(sc);
1102 		break;
1103 	case VMXNET3_IT_LEGACY:
1104 		error = vmxnet3_setup_legacy_interrupt(sc);
1105 		break;
1106 	default:
1107 		panic("%s: invalid interrupt type %d", __func__,
1108 		    sc->vmx_intr_type);
1109 	}
1110 
1111 	if (error == 0)
1112 		vmxnet3_set_interrupt_idx(sc);
1113 
1114 	return (error);
1115 }
1116 
1117 static int
1118 vmxnet3_init_rxq(struct vmxnet3_softc *sc, int q)
1119 {
1120 	struct vmxnet3_rxqueue *rxq;
1121 	struct vmxnet3_rxring *rxr;
1122 	int i;
1123 
1124 	rxq = &sc->vmx_queue[q].vxq_rxqueue;
1125 
1126 	snprintf(rxq->vxrxq_name, sizeof(rxq->vxrxq_name), "%s-rx%d",
1127 	    device_xname(sc->vmx_dev), q);
1128 	rxq->vxrxq_mtx = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NET /* XXX */);
1129 
1130 	rxq->vxrxq_sc = sc;
1131 
1132 	for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1133 		rxr = &rxq->vxrxq_cmd_ring[i];
1134 		rxr->vxrxr_rid = i;
1135 		rxr->vxrxr_ndesc = sc->vmx_nrxdescs;
1136 		rxr->vxrxr_rxbuf = kmem_zalloc(rxr->vxrxr_ndesc *
1137 		    sizeof(struct vmxnet3_rxbuf), KM_SLEEP);
1138 
1139 		rxq->vxrxq_comp_ring.vxcr_ndesc += sc->vmx_nrxdescs;
1140 	}
1141 
1142 	return (0);
1143 }
1144 
1145 static int
1146 vmxnet3_init_txq(struct vmxnet3_softc *sc, int q)
1147 {
1148 	struct vmxnet3_txqueue *txq;
1149 	struct vmxnet3_txring *txr;
1150 
1151 	txq = &sc->vmx_queue[q].vxq_txqueue;
1152 	txr = &txq->vxtxq_cmd_ring;
1153 
1154 	snprintf(txq->vxtxq_name, sizeof(txq->vxtxq_name), "%s-tx%d",
1155 	    device_xname(sc->vmx_dev), q);
1156 	txq->vxtxq_mtx = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NET /* XXX */);
1157 
1158 	txq->vxtxq_sc = sc;
1159 
1160 	txq->vxtxq_si = softint_establish(SOFTINT_NET | SOFTINT_MPSAFE,
1161 	    vmxnet3_deferred_transmit, txq);
1162 	if (txq->vxtxq_si == NULL) {
1163 		mutex_obj_free(txq->vxtxq_mtx);
1164 		aprint_error_dev(sc->vmx_dev,
1165 		    "softint_establish for vxtxq_si failed\n");
1166 		return ENOMEM;
1167 	}
1168 
1169 	txr->vxtxr_ndesc = sc->vmx_ntxdescs;
1170 	txr->vxtxr_txbuf = kmem_zalloc(txr->vxtxr_ndesc *
1171 	    sizeof(struct vmxnet3_txbuf), KM_SLEEP);
1172 
1173 	txq->vxtxq_comp_ring.vxcr_ndesc = sc->vmx_ntxdescs;
1174 
1175 	txq->vxtxq_interq = pcq_create(sc->vmx_ntxdescs, KM_SLEEP);
1176 
1177 	return (0);
1178 }
1179 
1180 static int
1181 vmxnet3_alloc_rxtx_queues(struct vmxnet3_softc *sc)
1182 {
1183 	int i, error, max_nqueues;
1184 
1185 	KASSERT(!cpu_intr_p());
1186 	KASSERT(!cpu_softintr_p());
1187 
1188 	/*
1189 	 * Only attempt to create multiple queues if MSIX is available.
1190 	 * This check prevents us from allocating queue structures that
1191 	 * we will not use.
1192 	 *
1193 	 * FreeBSD:
1194 	 * MSIX is disabled by default because its apparently broken for
1195 	 * devices passed through by at least ESXi 5.1.
1196 	 * The hw.pci.honor_msi_blacklist tunable must be set to zero for MSIX.
1197 	 */
1198 	if (sc->vmx_flags & VMXNET3_FLAG_NO_MSIX) {
1199 		sc->vmx_max_nrxqueues = 1;
1200 		sc->vmx_max_ntxqueues = 1;
1201 	}
1202 
1203 	max_nqueues = MAX(sc->vmx_max_ntxqueues, sc->vmx_max_nrxqueues);
1204 	sc->vmx_queue = kmem_zalloc(sizeof(struct vmxnet3_queue) * max_nqueues,
1205 	    KM_SLEEP);
1206 
1207 	for (i = 0; i < max_nqueues; i++) {
1208 		struct vmxnet3_queue *vmxq = &sc->vmx_queue[i];
1209 		vmxq->vxq_id = i;
1210 	}
1211 
1212 	for (i = 0; i < sc->vmx_max_nrxqueues; i++) {
1213 		error = vmxnet3_init_rxq(sc, i);
1214 		if (error)
1215 			return (error);
1216 	}
1217 
1218 	for (i = 0; i < sc->vmx_max_ntxqueues; i++) {
1219 		error = vmxnet3_init_txq(sc, i);
1220 		if (error)
1221 			return (error);
1222 	}
1223 
1224 	return (0);
1225 }
1226 
1227 static void
1228 vmxnet3_destroy_rxq(struct vmxnet3_rxqueue *rxq)
1229 {
1230 	struct vmxnet3_rxring *rxr;
1231 	int i;
1232 
1233 	rxq->vxrxq_sc = NULL;
1234 
1235 	for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1236 		rxr = &rxq->vxrxq_cmd_ring[i];
1237 
1238 		if (rxr->vxrxr_rxbuf != NULL) {
1239 			kmem_free(rxr->vxrxr_rxbuf,
1240 			    rxr->vxrxr_ndesc * sizeof(struct vmxnet3_rxbuf));
1241 			rxr->vxrxr_rxbuf = NULL;
1242 		}
1243 	}
1244 
1245 	if (rxq->vxrxq_mtx != NULL)
1246 		mutex_obj_free(rxq->vxrxq_mtx);
1247 }
1248 
1249 static void
1250 vmxnet3_destroy_txq(struct vmxnet3_txqueue *txq)
1251 {
1252 	struct vmxnet3_txring *txr;
1253 	struct mbuf *m;
1254 
1255 	txr = &txq->vxtxq_cmd_ring;
1256 
1257 	txq->vxtxq_sc = NULL;
1258 
1259 	softint_disestablish(txq->vxtxq_si);
1260 
1261 	while ((m = pcq_get(txq->vxtxq_interq)) != NULL)
1262 		m_freem(m);
1263 	pcq_destroy(txq->vxtxq_interq);
1264 
1265 	if (txr->vxtxr_txbuf != NULL) {
1266 		kmem_free(txr->vxtxr_txbuf,
1267 		    txr->vxtxr_ndesc * sizeof(struct vmxnet3_txbuf));
1268 		txr->vxtxr_txbuf = NULL;
1269 	}
1270 
1271 	if (txq->vxtxq_mtx != NULL)
1272 		mutex_obj_free(txq->vxtxq_mtx);
1273 }
1274 
1275 static void
1276 vmxnet3_free_rxtx_queues(struct vmxnet3_softc *sc)
1277 {
1278 	int i;
1279 
1280 	if (sc->vmx_queue != NULL) {
1281 		int max_nqueues;
1282 
1283 		for (i = 0; i < sc->vmx_max_nrxqueues; i++)
1284 			vmxnet3_destroy_rxq(&sc->vmx_queue[i].vxq_rxqueue);
1285 
1286 		for (i = 0; i < sc->vmx_max_ntxqueues; i++)
1287 			vmxnet3_destroy_txq(&sc->vmx_queue[i].vxq_txqueue);
1288 
1289 		max_nqueues = MAX(sc->vmx_max_nrxqueues, sc->vmx_max_ntxqueues);
1290 		kmem_free(sc->vmx_queue,
1291 		    sizeof(struct vmxnet3_queue) * max_nqueues);
1292 	}
1293 }
1294 
1295 static int
1296 vmxnet3_alloc_shared_data(struct vmxnet3_softc *sc)
1297 {
1298 	device_t dev;
1299 	uint8_t *kva;
1300 	size_t size;
1301 	int i, error;
1302 
1303 	dev = sc->vmx_dev;
1304 
1305 	size = sizeof(struct vmxnet3_driver_shared);
1306 	error = vmxnet3_dma_malloc(sc, size, 1, &sc->vmx_ds_dma);
1307 	if (error) {
1308 		device_printf(dev, "cannot alloc shared memory\n");
1309 		return (error);
1310 	}
1311 	sc->vmx_ds = (struct vmxnet3_driver_shared *) sc->vmx_ds_dma.dma_vaddr;
1312 
1313 	size = sc->vmx_ntxqueues * sizeof(struct vmxnet3_txq_shared) +
1314 	    sc->vmx_nrxqueues * sizeof(struct vmxnet3_rxq_shared);
1315 	error = vmxnet3_dma_malloc(sc, size, 128, &sc->vmx_qs_dma);
1316 	if (error) {
1317 		device_printf(dev, "cannot alloc queue shared memory\n");
1318 		return (error);
1319 	}
1320 	sc->vmx_qs = (void *) sc->vmx_qs_dma.dma_vaddr;
1321 	kva = sc->vmx_qs;
1322 
1323 	for (i = 0; i < sc->vmx_ntxqueues; i++) {
1324 		sc->vmx_queue[i].vxq_txqueue.vxtxq_ts =
1325 		    (struct vmxnet3_txq_shared *) kva;
1326 		kva += sizeof(struct vmxnet3_txq_shared);
1327 	}
1328 	for (i = 0; i < sc->vmx_nrxqueues; i++) {
1329 		sc->vmx_queue[i].vxq_rxqueue.vxrxq_rs =
1330 		    (struct vmxnet3_rxq_shared *) kva;
1331 		kva += sizeof(struct vmxnet3_rxq_shared);
1332 	}
1333 
1334 	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
1335 		size = sizeof(struct vmxnet3_rss_shared);
1336 		error = vmxnet3_dma_malloc(sc, size, 128, &sc->vmx_rss_dma);
1337 		if (error) {
1338 			device_printf(dev, "cannot alloc rss shared memory\n");
1339 			return (error);
1340 		}
1341 		sc->vmx_rss =
1342 		    (struct vmxnet3_rss_shared *) sc->vmx_rss_dma.dma_vaddr;
1343 	}
1344 
1345 	return (0);
1346 }
1347 
1348 static void
1349 vmxnet3_free_shared_data(struct vmxnet3_softc *sc)
1350 {
1351 
1352 	if (sc->vmx_rss != NULL) {
1353 		vmxnet3_dma_free(sc, &sc->vmx_rss_dma);
1354 		sc->vmx_rss = NULL;
1355 	}
1356 
1357 	if (sc->vmx_qs != NULL) {
1358 		vmxnet3_dma_free(sc, &sc->vmx_qs_dma);
1359 		sc->vmx_qs = NULL;
1360 	}
1361 
1362 	if (sc->vmx_ds != NULL) {
1363 		vmxnet3_dma_free(sc, &sc->vmx_ds_dma);
1364 		sc->vmx_ds = NULL;
1365 	}
1366 }
1367 
1368 static int
1369 vmxnet3_alloc_txq_data(struct vmxnet3_softc *sc)
1370 {
1371 	device_t dev;
1372 	struct vmxnet3_txqueue *txq;
1373 	struct vmxnet3_txring *txr;
1374 	struct vmxnet3_comp_ring *txc;
1375 	size_t descsz, compsz;
1376 	u_int i;
1377 	int q, error;
1378 
1379 	dev = sc->vmx_dev;
1380 
1381 	for (q = 0; q < sc->vmx_ntxqueues; q++) {
1382 		txq = &sc->vmx_queue[q].vxq_txqueue;
1383 		txr = &txq->vxtxq_cmd_ring;
1384 		txc = &txq->vxtxq_comp_ring;
1385 
1386 		descsz = txr->vxtxr_ndesc * sizeof(struct vmxnet3_txdesc);
1387 		compsz = txr->vxtxr_ndesc * sizeof(struct vmxnet3_txcompdesc);
1388 
1389 		error = vmxnet3_dma_malloc(sc, descsz, 512, &txr->vxtxr_dma);
1390 		if (error) {
1391 			device_printf(dev, "cannot alloc Tx descriptors for "
1392 			    "queue %d error %d\n", q, error);
1393 			return (error);
1394 		}
1395 		txr->vxtxr_txd =
1396 		    (struct vmxnet3_txdesc *) txr->vxtxr_dma.dma_vaddr;
1397 
1398 		error = vmxnet3_dma_malloc(sc, compsz, 512, &txc->vxcr_dma);
1399 		if (error) {
1400 			device_printf(dev, "cannot alloc Tx comp descriptors "
1401 			   "for queue %d error %d\n", q, error);
1402 			return (error);
1403 		}
1404 		txc->vxcr_u.txcd =
1405 		    (struct vmxnet3_txcompdesc *) txc->vxcr_dma.dma_vaddr;
1406 
1407 		for (i = 0; i < txr->vxtxr_ndesc; i++) {
1408 			error = bus_dmamap_create(sc->vmx_dmat, VMXNET3_TX_MAXSIZE,
1409 			    VMXNET3_TX_MAXSEGS, VMXNET3_TX_MAXSEGSIZE, 0, BUS_DMA_NOWAIT,
1410 			    &txr->vxtxr_txbuf[i].vtxb_dmamap);
1411 			if (error) {
1412 				device_printf(dev, "unable to create Tx buf "
1413 				    "dmamap for queue %d idx %d\n", q, i);
1414 				return (error);
1415 			}
1416 		}
1417 	}
1418 
1419 	return (0);
1420 }
1421 
1422 static void
1423 vmxnet3_free_txq_data(struct vmxnet3_softc *sc)
1424 {
1425 	struct vmxnet3_txqueue *txq;
1426 	struct vmxnet3_txring *txr;
1427 	struct vmxnet3_comp_ring *txc;
1428 	struct vmxnet3_txbuf *txb;
1429 	u_int i;
1430 	int q;
1431 
1432 	for (q = 0; q < sc->vmx_ntxqueues; q++) {
1433 		txq = &sc->vmx_queue[q].vxq_txqueue;
1434 		txr = &txq->vxtxq_cmd_ring;
1435 		txc = &txq->vxtxq_comp_ring;
1436 
1437 		for (i = 0; i < txr->vxtxr_ndesc; i++) {
1438 			txb = &txr->vxtxr_txbuf[i];
1439 			if (txb->vtxb_dmamap != NULL) {
1440 				bus_dmamap_destroy(sc->vmx_dmat,
1441 				    txb->vtxb_dmamap);
1442 				txb->vtxb_dmamap = NULL;
1443 			}
1444 		}
1445 
1446 		if (txc->vxcr_u.txcd != NULL) {
1447 			vmxnet3_dma_free(sc, &txc->vxcr_dma);
1448 			txc->vxcr_u.txcd = NULL;
1449 		}
1450 
1451 		if (txr->vxtxr_txd != NULL) {
1452 			vmxnet3_dma_free(sc, &txr->vxtxr_dma);
1453 			txr->vxtxr_txd = NULL;
1454 		}
1455 	}
1456 }
1457 
1458 static int
1459 vmxnet3_alloc_rxq_data(struct vmxnet3_softc *sc)
1460 {
1461 	device_t dev;
1462 	struct vmxnet3_rxqueue *rxq;
1463 	struct vmxnet3_rxring *rxr;
1464 	struct vmxnet3_comp_ring *rxc;
1465 	int descsz, compsz;
1466 	u_int i, j;
1467 	int q, error;
1468 
1469 	dev = sc->vmx_dev;
1470 
1471 	for (q = 0; q < sc->vmx_nrxqueues; q++) {
1472 		rxq = &sc->vmx_queue[q].vxq_rxqueue;
1473 		rxc = &rxq->vxrxq_comp_ring;
1474 		compsz = 0;
1475 
1476 		for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1477 			rxr = &rxq->vxrxq_cmd_ring[i];
1478 
1479 			descsz = rxr->vxrxr_ndesc *
1480 			    sizeof(struct vmxnet3_rxdesc);
1481 			compsz += rxr->vxrxr_ndesc *
1482 			    sizeof(struct vmxnet3_rxcompdesc);
1483 
1484 			error = vmxnet3_dma_malloc(sc, descsz, 512,
1485 			    &rxr->vxrxr_dma);
1486 			if (error) {
1487 				device_printf(dev, "cannot allocate Rx "
1488 				    "descriptors for queue %d/%d error %d\n",
1489 				    i, q, error);
1490 				return (error);
1491 			}
1492 			rxr->vxrxr_rxd =
1493 			    (struct vmxnet3_rxdesc *) rxr->vxrxr_dma.dma_vaddr;
1494 		}
1495 
1496 		error = vmxnet3_dma_malloc(sc, compsz, 512, &rxc->vxcr_dma);
1497 		if (error) {
1498 			device_printf(dev, "cannot alloc Rx comp descriptors "
1499 			    "for queue %d error %d\n", q, error);
1500 			return (error);
1501 		}
1502 		rxc->vxcr_u.rxcd =
1503 		    (struct vmxnet3_rxcompdesc *) rxc->vxcr_dma.dma_vaddr;
1504 
1505 		for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1506 			rxr = &rxq->vxrxq_cmd_ring[i];
1507 
1508 			error = bus_dmamap_create(sc->vmx_dmat, JUMBO_LEN, 1,
1509 			    JUMBO_LEN, 0, BUS_DMA_NOWAIT,
1510 			    &rxr->vxrxr_spare_dmap);
1511 			if (error) {
1512 				device_printf(dev, "unable to create spare "
1513 				    "dmamap for queue %d/%d error %d\n",
1514 				    q, i, error);
1515 				return (error);
1516 			}
1517 
1518 			for (j = 0; j < rxr->vxrxr_ndesc; j++) {
1519 				error = bus_dmamap_create(sc->vmx_dmat, JUMBO_LEN, 1,
1520 				    JUMBO_LEN, 0, BUS_DMA_NOWAIT,
1521 				    &rxr->vxrxr_rxbuf[j].vrxb_dmamap);
1522 				if (error) {
1523 					device_printf(dev, "unable to create "
1524 					    "dmamap for queue %d/%d slot %d "
1525 					    "error %d\n",
1526 					    q, i, j, error);
1527 					return (error);
1528 				}
1529 			}
1530 		}
1531 	}
1532 
1533 	return (0);
1534 }
1535 
1536 static void
1537 vmxnet3_free_rxq_data(struct vmxnet3_softc *sc)
1538 {
1539 	struct vmxnet3_rxqueue *rxq;
1540 	struct vmxnet3_rxring *rxr;
1541 	struct vmxnet3_comp_ring *rxc;
1542 	struct vmxnet3_rxbuf *rxb;
1543 	u_int i, j;
1544 	int q;
1545 
1546 	for (q = 0; q < sc->vmx_nrxqueues; q++) {
1547 		rxq = &sc->vmx_queue[q].vxq_rxqueue;
1548 		rxc = &rxq->vxrxq_comp_ring;
1549 
1550 		for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1551 			rxr = &rxq->vxrxq_cmd_ring[i];
1552 
1553 			if (rxr->vxrxr_spare_dmap != NULL) {
1554 				bus_dmamap_destroy(sc->vmx_dmat,
1555 				    rxr->vxrxr_spare_dmap);
1556 				rxr->vxrxr_spare_dmap = NULL;
1557 			}
1558 
1559 			for (j = 0; j < rxr->vxrxr_ndesc; j++) {
1560 				rxb = &rxr->vxrxr_rxbuf[j];
1561 				if (rxb->vrxb_dmamap != NULL) {
1562 					bus_dmamap_destroy(sc->vmx_dmat,
1563 					    rxb->vrxb_dmamap);
1564 					rxb->vrxb_dmamap = NULL;
1565 				}
1566 			}
1567 		}
1568 
1569 		if (rxc->vxcr_u.rxcd != NULL) {
1570 			vmxnet3_dma_free(sc, &rxc->vxcr_dma);
1571 			rxc->vxcr_u.rxcd = NULL;
1572 		}
1573 
1574 		for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1575 			rxr = &rxq->vxrxq_cmd_ring[i];
1576 
1577 			if (rxr->vxrxr_rxd != NULL) {
1578 				vmxnet3_dma_free(sc, &rxr->vxrxr_dma);
1579 				rxr->vxrxr_rxd = NULL;
1580 			}
1581 		}
1582 	}
1583 }
1584 
1585 static int
1586 vmxnet3_alloc_queue_data(struct vmxnet3_softc *sc)
1587 {
1588 	int error;
1589 
1590 	error = vmxnet3_alloc_txq_data(sc);
1591 	if (error)
1592 		return (error);
1593 
1594 	error = vmxnet3_alloc_rxq_data(sc);
1595 	if (error)
1596 		return (error);
1597 
1598 	return (0);
1599 }
1600 
1601 static void
1602 vmxnet3_free_queue_data(struct vmxnet3_softc *sc)
1603 {
1604 
1605 	if (sc->vmx_queue != NULL) {
1606 		vmxnet3_free_rxq_data(sc);
1607 		vmxnet3_free_txq_data(sc);
1608 	}
1609 }
1610 
1611 static int
1612 vmxnet3_alloc_mcast_table(struct vmxnet3_softc *sc)
1613 {
1614 	int error;
1615 
1616 	error = vmxnet3_dma_malloc(sc, VMXNET3_MULTICAST_MAX * ETHER_ADDR_LEN,
1617 	    32, &sc->vmx_mcast_dma);
1618 	if (error)
1619 		device_printf(sc->vmx_dev, "unable to alloc multicast table\n");
1620 	else
1621 		sc->vmx_mcast = sc->vmx_mcast_dma.dma_vaddr;
1622 
1623 	return (error);
1624 }
1625 
1626 static void
1627 vmxnet3_free_mcast_table(struct vmxnet3_softc *sc)
1628 {
1629 
1630 	if (sc->vmx_mcast != NULL) {
1631 		vmxnet3_dma_free(sc, &sc->vmx_mcast_dma);
1632 		sc->vmx_mcast = NULL;
1633 	}
1634 }
1635 
1636 static void
1637 vmxnet3_init_shared_data(struct vmxnet3_softc *sc)
1638 {
1639 	struct vmxnet3_driver_shared *ds;
1640 	struct vmxnet3_txqueue *txq;
1641 	struct vmxnet3_txq_shared *txs;
1642 	struct vmxnet3_rxqueue *rxq;
1643 	struct vmxnet3_rxq_shared *rxs;
1644 	int i;
1645 
1646 	ds = sc->vmx_ds;
1647 
1648 	/*
1649 	 * Initialize fields of the shared data that remains the same across
1650 	 * reinits. Note the shared data is zero'd when allocated.
1651 	 */
1652 
1653 	ds->magic = VMXNET3_REV1_MAGIC;
1654 
1655 	/* DriverInfo */
1656 	ds->version = VMXNET3_DRIVER_VERSION;
1657 	ds->guest = VMXNET3_GOS_FREEBSD |
1658 #ifdef __LP64__
1659 	    VMXNET3_GOS_64BIT;
1660 #else
1661 	    VMXNET3_GOS_32BIT;
1662 #endif
1663 	ds->vmxnet3_revision = 1;
1664 	ds->upt_version = 1;
1665 
1666 	/* Misc. conf */
1667 	ds->driver_data = vtophys(sc);
1668 	ds->driver_data_len = sizeof(struct vmxnet3_softc);
1669 	ds->queue_shared = sc->vmx_qs_dma.dma_paddr;
1670 	ds->queue_shared_len = sc->vmx_qs_dma.dma_size;
1671 	ds->nrxsg_max = sc->vmx_max_rxsegs;
1672 
1673 	/* RSS conf */
1674 	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
1675 		ds->rss.version = 1;
1676 		ds->rss.paddr = sc->vmx_rss_dma.dma_paddr;
1677 		ds->rss.len = sc->vmx_rss_dma.dma_size;
1678 	}
1679 
1680 	/* Interrupt control. */
1681 	ds->automask = sc->vmx_intr_mask_mode == VMXNET3_IMM_AUTO;
1682 	ds->nintr = sc->vmx_nintrs;
1683 	ds->evintr = sc->vmx_event_intr_idx;
1684 	ds->ictrl = VMXNET3_ICTRL_DISABLE_ALL;
1685 
1686 	for (i = 0; i < sc->vmx_nintrs; i++)
1687 		ds->modlevel[i] = UPT1_IMOD_ADAPTIVE;
1688 
1689 	/* Receive filter. */
1690 	ds->mcast_table = sc->vmx_mcast_dma.dma_paddr;
1691 	ds->mcast_tablelen = sc->vmx_mcast_dma.dma_size;
1692 
1693 	/* Tx queues */
1694 	for (i = 0; i < sc->vmx_ntxqueues; i++) {
1695 		txq = &sc->vmx_queue[i].vxq_txqueue;
1696 		txs = txq->vxtxq_ts;
1697 
1698 		txs->cmd_ring = txq->vxtxq_cmd_ring.vxtxr_dma.dma_paddr;
1699 		txs->cmd_ring_len = txq->vxtxq_cmd_ring.vxtxr_ndesc;
1700 		txs->comp_ring = txq->vxtxq_comp_ring.vxcr_dma.dma_paddr;
1701 		txs->comp_ring_len = txq->vxtxq_comp_ring.vxcr_ndesc;
1702 		txs->driver_data = vtophys(txq);
1703 		txs->driver_data_len = sizeof(struct vmxnet3_txqueue);
1704 	}
1705 
1706 	/* Rx queues */
1707 	for (i = 0; i < sc->vmx_nrxqueues; i++) {
1708 		rxq = &sc->vmx_queue[i].vxq_rxqueue;
1709 		rxs = rxq->vxrxq_rs;
1710 
1711 		rxs->cmd_ring[0] = rxq->vxrxq_cmd_ring[0].vxrxr_dma.dma_paddr;
1712 		rxs->cmd_ring_len[0] = rxq->vxrxq_cmd_ring[0].vxrxr_ndesc;
1713 		rxs->cmd_ring[1] = rxq->vxrxq_cmd_ring[1].vxrxr_dma.dma_paddr;
1714 		rxs->cmd_ring_len[1] = rxq->vxrxq_cmd_ring[1].vxrxr_ndesc;
1715 		rxs->comp_ring = rxq->vxrxq_comp_ring.vxcr_dma.dma_paddr;
1716 		rxs->comp_ring_len = rxq->vxrxq_comp_ring.vxcr_ndesc;
1717 		rxs->driver_data = vtophys(rxq);
1718 		rxs->driver_data_len = sizeof(struct vmxnet3_rxqueue);
1719 	}
1720 }
1721 
1722 static void
1723 vmxnet3_reinit_rss_shared_data(struct vmxnet3_softc *sc)
1724 {
1725 	/*
1726 	 * Use the same key as the Linux driver until FreeBSD can do
1727 	 * RSS (presumably Toeplitz) in software.
1728 	 */
1729 	static const uint8_t rss_key[UPT1_RSS_MAX_KEY_SIZE] = {
1730 	    0x3b, 0x56, 0xd1, 0x56, 0x13, 0x4a, 0xe7, 0xac,
1731 	    0xe8, 0x79, 0x09, 0x75, 0xe8, 0x65, 0x79, 0x28,
1732 	    0x35, 0x12, 0xb9, 0x56, 0x7c, 0x76, 0x4b, 0x70,
1733 	    0xd8, 0x56, 0xa3, 0x18, 0x9b, 0x0a, 0xee, 0xf3,
1734 	    0x96, 0xa6, 0x9f, 0x8f, 0x9e, 0x8c, 0x90, 0xc9,
1735 	};
1736 
1737 	struct vmxnet3_rss_shared *rss;
1738 	int i;
1739 
1740 	rss = sc->vmx_rss;
1741 
1742 	rss->hash_type =
1743 	    UPT1_RSS_HASH_TYPE_IPV4 | UPT1_RSS_HASH_TYPE_TCP_IPV4 |
1744 	    UPT1_RSS_HASH_TYPE_IPV6 | UPT1_RSS_HASH_TYPE_TCP_IPV6;
1745 	rss->hash_func = UPT1_RSS_HASH_FUNC_TOEPLITZ;
1746 	rss->hash_key_size = UPT1_RSS_MAX_KEY_SIZE;
1747 	rss->ind_table_size = UPT1_RSS_MAX_IND_TABLE_SIZE;
1748 	memcpy(rss->hash_key, rss_key, UPT1_RSS_MAX_KEY_SIZE);
1749 
1750 	for (i = 0; i < UPT1_RSS_MAX_IND_TABLE_SIZE; i++)
1751 		rss->ind_table[i] = i % sc->vmx_nrxqueues;
1752 }
1753 
1754 static void
1755 vmxnet3_reinit_shared_data(struct vmxnet3_softc *sc)
1756 {
1757 	struct ifnet *ifp;
1758 	struct vmxnet3_driver_shared *ds;
1759 
1760 	ifp = &sc->vmx_ethercom.ec_if;
1761 	ds = sc->vmx_ds;
1762 
1763 	ds->mtu = ifp->if_mtu;
1764 	ds->ntxqueue = sc->vmx_ntxqueues;
1765 	ds->nrxqueue = sc->vmx_nrxqueues;
1766 
1767 	ds->upt_features = 0;
1768 	if (ifp->if_capenable &
1769 	    (IFCAP_CSUM_IPv4_Rx | IFCAP_CSUM_TCPv4_Rx | IFCAP_CSUM_UDPv4_Rx |
1770 	    IFCAP_CSUM_TCPv6_Rx | IFCAP_CSUM_UDPv6_Rx))
1771 		ds->upt_features |= UPT1_F_CSUM;
1772 	if (sc->vmx_ethercom.ec_capenable & ETHERCAP_VLAN_HWTAGGING)
1773 		ds->upt_features |= UPT1_F_VLAN;
1774 
1775 	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
1776 		ds->upt_features |= UPT1_F_RSS;
1777 		vmxnet3_reinit_rss_shared_data(sc);
1778 	}
1779 
1780 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_DSL, sc->vmx_ds_dma.dma_paddr);
1781 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_DSH,
1782 	    (uint64_t) sc->vmx_ds_dma.dma_paddr >> 32);
1783 }
1784 
1785 static int
1786 vmxnet3_alloc_data(struct vmxnet3_softc *sc)
1787 {
1788 	int error;
1789 
1790 	error = vmxnet3_alloc_shared_data(sc);
1791 	if (error)
1792 		return (error);
1793 
1794 	error = vmxnet3_alloc_queue_data(sc);
1795 	if (error)
1796 		return (error);
1797 
1798 	error = vmxnet3_alloc_mcast_table(sc);
1799 	if (error)
1800 		return (error);
1801 
1802 	vmxnet3_init_shared_data(sc);
1803 
1804 	return (0);
1805 }
1806 
1807 static void
1808 vmxnet3_free_data(struct vmxnet3_softc *sc)
1809 {
1810 
1811 	vmxnet3_free_mcast_table(sc);
1812 	vmxnet3_free_queue_data(sc);
1813 	vmxnet3_free_shared_data(sc);
1814 }
1815 
1816 static int
1817 vmxnet3_setup_interface(struct vmxnet3_softc *sc)
1818 {
1819 	struct ifnet *ifp = &sc->vmx_ethercom.ec_if;
1820 
1821 	vmxnet3_get_lladdr(sc);
1822 	aprint_normal_dev(sc->vmx_dev, "Ethernet address %s\n",
1823 	    ether_sprintf(sc->vmx_lladdr));
1824 	vmxnet3_set_lladdr(sc);
1825 
1826 	strlcpy(ifp->if_xname, device_xname(sc->vmx_dev), IFNAMSIZ);
1827 	ifp->if_softc = sc;
1828 	ifp->if_flags = IFF_BROADCAST | IFF_MULTICAST | IFF_SIMPLEX;
1829 	ifp->if_extflags = IFEF_MPSAFE;
1830 	ifp->if_ioctl = vmxnet3_ioctl;
1831 	ifp->if_start = vmxnet3_start;
1832 	ifp->if_transmit = vmxnet3_transmit;
1833 	ifp->if_watchdog = NULL;
1834 	ifp->if_init = vmxnet3_init;
1835 	ifp->if_stop = vmxnet3_stop;
1836 	sc->vmx_ethercom.ec_if.if_capabilities |=IFCAP_CSUM_IPv4_Rx |
1837 		    IFCAP_CSUM_TCPv4_Tx | IFCAP_CSUM_TCPv4_Rx |
1838 		    IFCAP_CSUM_UDPv4_Tx | IFCAP_CSUM_UDPv4_Rx |
1839 		    IFCAP_CSUM_TCPv6_Tx | IFCAP_CSUM_TCPv6_Rx |
1840 		    IFCAP_CSUM_UDPv6_Tx | IFCAP_CSUM_UDPv6_Rx;
1841 
1842 	ifp->if_capenable = ifp->if_capabilities;
1843 
1844 	sc->vmx_ethercom.ec_if.if_capabilities |= IFCAP_TSOv4 | IFCAP_TSOv6;
1845 
1846 	sc->vmx_ethercom.ec_capabilities |=
1847 	    ETHERCAP_VLAN_MTU | ETHERCAP_VLAN_HWTAGGING | ETHERCAP_JUMBO_MTU;
1848 	sc->vmx_ethercom.ec_capenable |= ETHERCAP_VLAN_HWTAGGING;
1849 
1850 	IFQ_SET_MAXLEN(&ifp->if_snd, sc->vmx_ntxdescs);
1851 	IFQ_SET_READY(&ifp->if_snd);
1852 
1853 	/* Initialize ifmedia structures. */
1854 	sc->vmx_ethercom.ec_ifmedia = &sc->vmx_media;
1855 	ifmedia_init_with_lock(&sc->vmx_media, IFM_IMASK, vmxnet3_ifmedia_change,
1856 	    vmxnet3_ifmedia_status, sc->vmx_mtx);
1857 	ifmedia_add(&sc->vmx_media, IFM_ETHER | IFM_AUTO, 0, NULL);
1858 	ifmedia_add(&sc->vmx_media, IFM_ETHER | IFM_10G_T | IFM_FDX, 0, NULL);
1859 	ifmedia_add(&sc->vmx_media, IFM_ETHER | IFM_10G_T, 0, NULL);
1860 	ifmedia_add(&sc->vmx_media, IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
1861 	ifmedia_add(&sc->vmx_media, IFM_ETHER | IFM_1000_T, 0, NULL);
1862 	ifmedia_set(&sc->vmx_media, IFM_ETHER | IFM_AUTO);
1863 
1864 	if_attach(ifp);
1865 	if_deferred_start_init(ifp, NULL);
1866 	ether_ifattach(ifp, sc->vmx_lladdr);
1867 	ether_set_ifflags_cb(&sc->vmx_ethercom, vmxnet3_ifflags_cb);
1868 	vmxnet3_cmd_link_status(ifp);
1869 
1870 	/* should set before setting interrupts */
1871 	sc->vmx_rx_intr_process_limit = VMXNET3_RX_INTR_PROCESS_LIMIT;
1872 	sc->vmx_rx_process_limit = VMXNET3_RX_PROCESS_LIMIT;
1873 	sc->vmx_tx_intr_process_limit = VMXNET3_TX_INTR_PROCESS_LIMIT;
1874 	sc->vmx_tx_process_limit = VMXNET3_TX_PROCESS_LIMIT;
1875 
1876 	return (0);
1877 }
1878 
1879 static int
1880 vmxnet3_setup_sysctl(struct vmxnet3_softc *sc)
1881 {
1882 	const char *devname;
1883 	struct sysctllog **log;
1884 	const struct sysctlnode *rnode, *rxnode, *txnode;
1885 	int error;
1886 
1887 	log = &sc->vmx_sysctllog;
1888 	devname = device_xname(sc->vmx_dev);
1889 
1890 	error = sysctl_createv(log, 0, NULL, &rnode,
1891 	    0, CTLTYPE_NODE, devname,
1892 	    SYSCTL_DESCR("vmxnet3 information and settings"),
1893 	    NULL, 0, NULL, 0, CTL_HW, CTL_CREATE, CTL_EOL);
1894 	if (error)
1895 		goto out;
1896 	error = sysctl_createv(log, 0, &rnode, NULL,
1897 	    CTLFLAG_READWRITE, CTLTYPE_BOOL, "txrx_workqueue",
1898 	    SYSCTL_DESCR("Use workqueue for packet processing"),
1899 	    NULL, 0, &sc->vmx_txrx_workqueue, 0, CTL_CREATE, CTL_EOL);
1900 	if (error)
1901 		goto out;
1902 
1903 	error = sysctl_createv(log, 0, &rnode, &rxnode,
1904 	    0, CTLTYPE_NODE, "rx",
1905 	    SYSCTL_DESCR("vmxnet3 information and settings for Rx"),
1906 	    NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL);
1907 	if (error)
1908 		goto out;
1909 	error = sysctl_createv(log, 0, &rxnode, NULL,
1910 	    CTLFLAG_READWRITE, CTLTYPE_INT, "intr_process_limit",
1911 	    SYSCTL_DESCR("max number of Rx packets to process for interrupt processing"),
1912 	    NULL, 0, &sc->vmx_rx_intr_process_limit, 0, CTL_CREATE, CTL_EOL);
1913 	if (error)
1914 		goto out;
1915 	error = sysctl_createv(log, 0, &rxnode, NULL,
1916 	    CTLFLAG_READWRITE, CTLTYPE_INT, "process_limit",
1917 	    SYSCTL_DESCR("max number of Rx packets to process for deferred processing"),
1918 	    NULL, 0, &sc->vmx_rx_process_limit, 0, CTL_CREATE, CTL_EOL);
1919 	if (error)
1920 		goto out;
1921 
1922 	error = sysctl_createv(log, 0, &rnode, &txnode,
1923 	    0, CTLTYPE_NODE, "tx",
1924 	    SYSCTL_DESCR("vmxnet3 information and settings for Tx"),
1925 	    NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL);
1926 	if (error)
1927 		goto out;
1928 	error = sysctl_createv(log, 0, &txnode, NULL,
1929 	    CTLFLAG_READWRITE, CTLTYPE_INT, "intr_process_limit",
1930 	    SYSCTL_DESCR("max number of Tx packets to process for interrupt processing"),
1931 	    NULL, 0, &sc->vmx_tx_intr_process_limit, 0, CTL_CREATE, CTL_EOL);
1932 	if (error)
1933 		goto out;
1934 	error = sysctl_createv(log, 0, &txnode, NULL,
1935 	    CTLFLAG_READWRITE, CTLTYPE_INT, "process_limit",
1936 	    SYSCTL_DESCR("max number of Tx packets to process for deferred processing"),
1937 	    NULL, 0, &sc->vmx_tx_process_limit, 0, CTL_CREATE, CTL_EOL);
1938 
1939 out:
1940 	if (error) {
1941 		aprint_error_dev(sc->vmx_dev,
1942 		    "unable to create sysctl node\n");
1943 		sysctl_teardown(log);
1944 	}
1945 	return error;
1946 }
1947 
1948 static int
1949 vmxnet3_setup_stats(struct vmxnet3_softc *sc)
1950 {
1951 	struct vmxnet3_queue *vmxq;
1952 	struct vmxnet3_txqueue *txq;
1953 	struct vmxnet3_rxqueue *rxq;
1954 	int i;
1955 
1956 	for (i = 0; i < sc->vmx_ntxqueues; i++) {
1957 		vmxq = &sc->vmx_queue[i];
1958 		txq = &vmxq->vxq_txqueue;
1959 		evcnt_attach_dynamic(&txq->vxtxq_intr, EVCNT_TYPE_INTR,
1960 		    NULL, txq->vxtxq_name, "Interrupt on queue");
1961 		evcnt_attach_dynamic(&txq->vxtxq_defer, EVCNT_TYPE_MISC,
1962 		    NULL, txq->vxtxq_name, "Handled queue in softint/workqueue");
1963 		evcnt_attach_dynamic(&txq->vxtxq_deferreq, EVCNT_TYPE_MISC,
1964 		    NULL, txq->vxtxq_name, "Requested in softint/workqueue");
1965 		evcnt_attach_dynamic(&txq->vxtxq_pcqdrop, EVCNT_TYPE_MISC,
1966 		    NULL, txq->vxtxq_name, "Dropped in pcq");
1967 		evcnt_attach_dynamic(&txq->vxtxq_transmitdef, EVCNT_TYPE_MISC,
1968 		    NULL, txq->vxtxq_name, "Deferred transmit");
1969 		evcnt_attach_dynamic(&txq->vxtxq_watchdogto, EVCNT_TYPE_MISC,
1970 		    NULL, txq->vxtxq_name, "Watchdog timeout");
1971 		evcnt_attach_dynamic(&txq->vxtxq_defragged, EVCNT_TYPE_MISC,
1972 		    NULL, txq->vxtxq_name, "m_defrag successed");
1973 		evcnt_attach_dynamic(&txq->vxtxq_defrag_failed, EVCNT_TYPE_MISC,
1974 		    NULL, txq->vxtxq_name, "m_defrag failed");
1975 	}
1976 
1977 	for (i = 0; i < sc->vmx_nrxqueues; i++) {
1978 		vmxq = &sc->vmx_queue[i];
1979 		rxq = &vmxq->vxq_rxqueue;
1980 		evcnt_attach_dynamic(&rxq->vxrxq_intr, EVCNT_TYPE_INTR,
1981 		    NULL, rxq->vxrxq_name, "Interrupt on queue");
1982 		evcnt_attach_dynamic(&rxq->vxrxq_defer, EVCNT_TYPE_MISC,
1983 		    NULL, rxq->vxrxq_name, "Handled queue in softint/workqueue");
1984 		evcnt_attach_dynamic(&rxq->vxrxq_deferreq, EVCNT_TYPE_MISC,
1985 		    NULL, rxq->vxrxq_name, "Requested in softint/workqueue");
1986 		evcnt_attach_dynamic(&rxq->vxrxq_mgetcl_failed, EVCNT_TYPE_MISC,
1987 		    NULL, rxq->vxrxq_name, "MCLGET failed");
1988 		evcnt_attach_dynamic(&rxq->vxrxq_mbuf_load_failed, EVCNT_TYPE_MISC,
1989 		    NULL, rxq->vxrxq_name, "bus_dmamap_load_mbuf failed");
1990 	}
1991 
1992 	evcnt_attach_dynamic(&sc->vmx_event_intr, EVCNT_TYPE_INTR,
1993 	    NULL, device_xname(sc->vmx_dev), "Interrupt for other events");
1994 	evcnt_attach_dynamic(&sc->vmx_event_link, EVCNT_TYPE_MISC,
1995 	    NULL, device_xname(sc->vmx_dev), "Link status event");
1996 	evcnt_attach_dynamic(&sc->vmx_event_txqerror, EVCNT_TYPE_MISC,
1997 	    NULL, device_xname(sc->vmx_dev), "Tx queue error event");
1998 	evcnt_attach_dynamic(&sc->vmx_event_rxqerror, EVCNT_TYPE_MISC,
1999 	    NULL, device_xname(sc->vmx_dev), "Rx queue error event");
2000 	evcnt_attach_dynamic(&sc->vmx_event_dic, EVCNT_TYPE_MISC,
2001 	    NULL, device_xname(sc->vmx_dev), "Device impl change event");
2002 	evcnt_attach_dynamic(&sc->vmx_event_debug, EVCNT_TYPE_MISC,
2003 	    NULL, device_xname(sc->vmx_dev), "Debug event");
2004 
2005 	return 0;
2006 }
2007 
2008 static void
2009 vmxnet3_teardown_stats(struct vmxnet3_softc *sc)
2010 {
2011 	struct vmxnet3_queue *vmxq;
2012 	struct vmxnet3_txqueue *txq;
2013 	struct vmxnet3_rxqueue *rxq;
2014 	int i;
2015 
2016 	for (i = 0; i < sc->vmx_ntxqueues; i++) {
2017 		vmxq = &sc->vmx_queue[i];
2018 		txq = &vmxq->vxq_txqueue;
2019 		evcnt_detach(&txq->vxtxq_intr);
2020 		evcnt_detach(&txq->vxtxq_defer);
2021 		evcnt_detach(&txq->vxtxq_deferreq);
2022 		evcnt_detach(&txq->vxtxq_pcqdrop);
2023 		evcnt_detach(&txq->vxtxq_transmitdef);
2024 		evcnt_detach(&txq->vxtxq_watchdogto);
2025 		evcnt_detach(&txq->vxtxq_defragged);
2026 		evcnt_detach(&txq->vxtxq_defrag_failed);
2027 	}
2028 
2029 	for (i = 0; i < sc->vmx_nrxqueues; i++) {
2030 		vmxq = &sc->vmx_queue[i];
2031 		rxq = &vmxq->vxq_rxqueue;
2032 		evcnt_detach(&rxq->vxrxq_intr);
2033 		evcnt_detach(&rxq->vxrxq_defer);
2034 		evcnt_detach(&rxq->vxrxq_deferreq);
2035 		evcnt_detach(&rxq->vxrxq_mgetcl_failed);
2036 		evcnt_detach(&rxq->vxrxq_mbuf_load_failed);
2037 	}
2038 
2039 	evcnt_detach(&sc->vmx_event_intr);
2040 	evcnt_detach(&sc->vmx_event_link);
2041 	evcnt_detach(&sc->vmx_event_txqerror);
2042 	evcnt_detach(&sc->vmx_event_rxqerror);
2043 	evcnt_detach(&sc->vmx_event_dic);
2044 	evcnt_detach(&sc->vmx_event_debug);
2045 }
2046 
2047 static void
2048 vmxnet3_evintr(struct vmxnet3_softc *sc)
2049 {
2050 	device_t dev;
2051 	struct vmxnet3_txq_shared *ts;
2052 	struct vmxnet3_rxq_shared *rs;
2053 	uint32_t event;
2054 	int reset;
2055 
2056 	dev = sc->vmx_dev;
2057 	reset = 0;
2058 
2059 	VMXNET3_CORE_LOCK(sc);
2060 
2061 	/* Clear events. */
2062 	event = sc->vmx_ds->event;
2063 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_EVENT, event);
2064 
2065 	if (event & VMXNET3_EVENT_LINK) {
2066 		sc->vmx_event_link.ev_count++;
2067 		vmxnet3_if_link_status(sc);
2068 		if (sc->vmx_link_active != 0)
2069 			if_schedule_deferred_start(&sc->vmx_ethercom.ec_if);
2070 	}
2071 
2072 	if (event & (VMXNET3_EVENT_TQERROR | VMXNET3_EVENT_RQERROR)) {
2073 		if (event & VMXNET3_EVENT_TQERROR)
2074 			sc->vmx_event_txqerror.ev_count++;
2075 		if (event & VMXNET3_EVENT_RQERROR)
2076 			sc->vmx_event_rxqerror.ev_count++;
2077 
2078 		reset = 1;
2079 		vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_STATUS);
2080 		ts = sc->vmx_queue[0].vxq_txqueue.vxtxq_ts;
2081 		if (ts->stopped != 0)
2082 			device_printf(dev, "Tx queue error %#x\n", ts->error);
2083 		rs = sc->vmx_queue[0].vxq_rxqueue.vxrxq_rs;
2084 		if (rs->stopped != 0)
2085 			device_printf(dev, "Rx queue error %#x\n", rs->error);
2086 		device_printf(dev, "Rx/Tx queue error event ... resetting\n");
2087 	}
2088 
2089 	if (event & VMXNET3_EVENT_DIC) {
2090 		sc->vmx_event_dic.ev_count++;
2091 		device_printf(dev, "device implementation change event\n");
2092 	}
2093 	if (event & VMXNET3_EVENT_DEBUG) {
2094 		sc->vmx_event_debug.ev_count++;
2095 		device_printf(dev, "debug event\n");
2096 	}
2097 
2098 	if (reset != 0)
2099 		vmxnet3_init_locked(sc);
2100 
2101 	VMXNET3_CORE_UNLOCK(sc);
2102 }
2103 
2104 static bool
2105 vmxnet3_txq_eof(struct vmxnet3_txqueue *txq, u_int limit)
2106 {
2107 	struct vmxnet3_softc *sc;
2108 	struct vmxnet3_txring *txr;
2109 	struct vmxnet3_comp_ring *txc;
2110 	struct vmxnet3_txcompdesc *txcd;
2111 	struct vmxnet3_txbuf *txb;
2112 	struct ifnet *ifp;
2113 	struct mbuf *m;
2114 	u_int sop;
2115 	bool more = false;
2116 
2117 	sc = txq->vxtxq_sc;
2118 	txr = &txq->vxtxq_cmd_ring;
2119 	txc = &txq->vxtxq_comp_ring;
2120 	ifp = &sc->vmx_ethercom.ec_if;
2121 
2122 	VMXNET3_TXQ_LOCK_ASSERT(txq);
2123 
2124 	net_stat_ref_t nsr = IF_STAT_GETREF(ifp);
2125 	for (;;) {
2126 		if (limit-- == 0) {
2127 			more = true;
2128 			break;
2129 		}
2130 
2131 		txcd = &txc->vxcr_u.txcd[txc->vxcr_next];
2132 		if (txcd->gen != txc->vxcr_gen)
2133 			break;
2134 		vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
2135 
2136 		if (++txc->vxcr_next == txc->vxcr_ndesc) {
2137 			txc->vxcr_next = 0;
2138 			txc->vxcr_gen ^= 1;
2139 		}
2140 
2141 		sop = txr->vxtxr_next;
2142 		txb = &txr->vxtxr_txbuf[sop];
2143 
2144 		if ((m = txb->vtxb_m) != NULL) {
2145 			bus_dmamap_sync(sc->vmx_dmat, txb->vtxb_dmamap,
2146 			    0, txb->vtxb_dmamap->dm_mapsize,
2147 			    BUS_DMASYNC_POSTWRITE);
2148 			bus_dmamap_unload(sc->vmx_dmat, txb->vtxb_dmamap);
2149 
2150 			if_statinc_ref(nsr, if_opackets);
2151 			if_statadd_ref(nsr, if_obytes, m->m_pkthdr.len);
2152 			if (m->m_flags & M_MCAST)
2153 				if_statinc_ref(nsr, if_omcasts);
2154 
2155 			m_freem(m);
2156 			txb->vtxb_m = NULL;
2157 		}
2158 
2159 		txr->vxtxr_next = (txcd->eop_idx + 1) % txr->vxtxr_ndesc;
2160 	}
2161 	IF_STAT_PUTREF(ifp);
2162 
2163 	if (txr->vxtxr_head == txr->vxtxr_next)
2164 		txq->vxtxq_watchdog = 0;
2165 
2166 	return more;
2167 }
2168 
2169 static int
2170 vmxnet3_newbuf(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rxq,
2171     struct vmxnet3_rxring *rxr)
2172 {
2173 	struct mbuf *m;
2174 	struct vmxnet3_rxdesc *rxd;
2175 	struct vmxnet3_rxbuf *rxb;
2176 	bus_dma_tag_t tag;
2177 	bus_dmamap_t dmap;
2178 	int idx, btype, error;
2179 
2180 	tag = sc->vmx_dmat;
2181 	dmap = rxr->vxrxr_spare_dmap;
2182 	idx = rxr->vxrxr_fill;
2183 	rxd = &rxr->vxrxr_rxd[idx];
2184 	rxb = &rxr->vxrxr_rxbuf[idx];
2185 
2186 	/* Don't allocate buffers for ring 2 for now. */
2187 	if (rxr->vxrxr_rid != 0)
2188 		return -1;
2189 	btype = VMXNET3_BTYPE_HEAD;
2190 
2191 	MGETHDR(m, M_DONTWAIT, MT_DATA);
2192 	if (m == NULL)
2193 		return (ENOBUFS);
2194 
2195 	MCLGET(m, M_DONTWAIT);
2196 	if ((m->m_flags & M_EXT) == 0) {
2197 		rxq->vxrxq_mgetcl_failed.ev_count++;
2198 		m_freem(m);
2199 		return (ENOBUFS);
2200 	}
2201 
2202 	m->m_pkthdr.len = m->m_len = JUMBO_LEN;
2203 	m_adj(m, ETHER_ALIGN);
2204 
2205 	error = bus_dmamap_load_mbuf(sc->vmx_dmat, dmap, m, BUS_DMA_NOWAIT);
2206 	if (error) {
2207 		m_freem(m);
2208 		rxq->vxrxq_mbuf_load_failed.ev_count++;
2209 		return (error);
2210 	}
2211 
2212 	if (rxb->vrxb_m != NULL) {
2213 		bus_dmamap_sync(tag, rxb->vrxb_dmamap,
2214 		    0, rxb->vrxb_dmamap->dm_mapsize,
2215 		    BUS_DMASYNC_POSTREAD);
2216 		bus_dmamap_unload(tag, rxb->vrxb_dmamap);
2217 	}
2218 
2219 	rxr->vxrxr_spare_dmap = rxb->vrxb_dmamap;
2220 	rxb->vrxb_dmamap = dmap;
2221 	rxb->vrxb_m = m;
2222 
2223 	rxd->addr = DMAADDR(dmap);
2224 	rxd->len = m->m_pkthdr.len;
2225 	rxd->btype = btype;
2226 	rxd->gen = rxr->vxrxr_gen;
2227 
2228 	vmxnet3_rxr_increment_fill(rxr);
2229 	return (0);
2230 }
2231 
2232 static void
2233 vmxnet3_rxq_eof_discard(struct vmxnet3_rxqueue *rxq,
2234     struct vmxnet3_rxring *rxr, int idx)
2235 {
2236 	struct vmxnet3_rxdesc *rxd;
2237 
2238 	rxd = &rxr->vxrxr_rxd[idx];
2239 	rxd->gen = rxr->vxrxr_gen;
2240 	vmxnet3_rxr_increment_fill(rxr);
2241 }
2242 
2243 static void
2244 vmxnet3_rxq_discard_chain(struct vmxnet3_rxqueue *rxq)
2245 {
2246 	struct vmxnet3_softc *sc;
2247 	struct vmxnet3_rxring *rxr;
2248 	struct vmxnet3_comp_ring *rxc;
2249 	struct vmxnet3_rxcompdesc *rxcd;
2250 	int idx, eof;
2251 
2252 	sc = rxq->vxrxq_sc;
2253 	rxc = &rxq->vxrxq_comp_ring;
2254 
2255 	do {
2256 		rxcd = &rxc->vxcr_u.rxcd[rxc->vxcr_next];
2257 		if (rxcd->gen != rxc->vxcr_gen)
2258 			break;		/* Not expected. */
2259 		vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
2260 
2261 		if (++rxc->vxcr_next == rxc->vxcr_ndesc) {
2262 			rxc->vxcr_next = 0;
2263 			rxc->vxcr_gen ^= 1;
2264 		}
2265 
2266 		idx = rxcd->rxd_idx;
2267 		eof = rxcd->eop;
2268 		if (rxcd->qid < sc->vmx_nrxqueues)
2269 			rxr = &rxq->vxrxq_cmd_ring[0];
2270 		else
2271 			rxr = &rxq->vxrxq_cmd_ring[1];
2272 		vmxnet3_rxq_eof_discard(rxq, rxr, idx);
2273 	} while (!eof);
2274 }
2275 
2276 static void
2277 vmxnet3_rx_csum(struct vmxnet3_rxcompdesc *rxcd, struct mbuf *m)
2278 {
2279 	if (rxcd->no_csum)
2280 		return;
2281 
2282 	if (rxcd->ipv4) {
2283 		m->m_pkthdr.csum_flags |= M_CSUM_IPv4;
2284 		if (rxcd->ipcsum_ok == 0)
2285 			m->m_pkthdr.csum_flags |= M_CSUM_IPv4_BAD;
2286 	}
2287 
2288 	if (rxcd->fragment)
2289 		return;
2290 
2291 	if (rxcd->tcp) {
2292 		m->m_pkthdr.csum_flags |=
2293 		    rxcd->ipv4 ? M_CSUM_TCPv4 : M_CSUM_TCPv6;
2294 		if ((rxcd->csum_ok) == 0)
2295 			m->m_pkthdr.csum_flags |= M_CSUM_TCP_UDP_BAD;
2296 	}
2297 
2298 	if (rxcd->udp) {
2299 		m->m_pkthdr.csum_flags |=
2300 		    rxcd->ipv4 ? M_CSUM_UDPv4 : M_CSUM_UDPv6 ;
2301 		if ((rxcd->csum_ok) == 0)
2302 			m->m_pkthdr.csum_flags |= M_CSUM_TCP_UDP_BAD;
2303 	}
2304 }
2305 
2306 static void
2307 vmxnet3_rxq_input(struct vmxnet3_rxqueue *rxq,
2308     struct vmxnet3_rxcompdesc *rxcd, struct mbuf *m)
2309 {
2310 	struct vmxnet3_softc *sc;
2311 	struct ifnet *ifp;
2312 
2313 	sc = rxq->vxrxq_sc;
2314 	ifp = &sc->vmx_ethercom.ec_if;
2315 
2316 	if (rxcd->error) {
2317 		if_statinc(ifp, if_ierrors);
2318 		m_freem(m);
2319 		return;
2320 	}
2321 
2322 	if (!rxcd->no_csum)
2323 		vmxnet3_rx_csum(rxcd, m);
2324 	if (rxcd->vlan)
2325 		vlan_set_tag(m, rxcd->vtag);
2326 
2327 	net_stat_ref_t nsr = IF_STAT_GETREF(ifp);
2328 	if_statinc_ref(nsr, if_ipackets);
2329 	if_statadd_ref(nsr, if_ibytes, m->m_pkthdr.len);
2330 	IF_STAT_PUTREF(ifp);
2331 
2332 	if_percpuq_enqueue(ifp->if_percpuq, m);
2333 }
2334 
2335 static bool
2336 vmxnet3_rxq_eof(struct vmxnet3_rxqueue *rxq, u_int limit)
2337 {
2338 	struct vmxnet3_softc *sc;
2339 	struct ifnet *ifp;
2340 	struct vmxnet3_rxring *rxr;
2341 	struct vmxnet3_comp_ring *rxc;
2342 	struct vmxnet3_rxdesc *rxd __diagused;
2343 	struct vmxnet3_rxcompdesc *rxcd;
2344 	struct mbuf *m, *m_head, *m_tail;
2345 	u_int idx, length;
2346 	bool more = false;
2347 
2348 	sc = rxq->vxrxq_sc;
2349 	ifp = &sc->vmx_ethercom.ec_if;
2350 	rxc = &rxq->vxrxq_comp_ring;
2351 
2352 	VMXNET3_RXQ_LOCK_ASSERT(rxq);
2353 
2354 	if ((ifp->if_flags & IFF_RUNNING) == 0)
2355 		return more;
2356 
2357 	m_head = rxq->vxrxq_mhead;
2358 	rxq->vxrxq_mhead = NULL;
2359 	m_tail = rxq->vxrxq_mtail;
2360 	rxq->vxrxq_mtail = NULL;
2361 	KASSERT(m_head == NULL || m_tail != NULL);
2362 
2363 	for (;;) {
2364 		if (limit-- == 0) {
2365 			more = true;
2366 			break;
2367 		}
2368 
2369 		rxcd = &rxc->vxcr_u.rxcd[rxc->vxcr_next];
2370 		if (rxcd->gen != rxc->vxcr_gen) {
2371 			rxq->vxrxq_mhead = m_head;
2372 			rxq->vxrxq_mtail = m_tail;
2373 			break;
2374 		}
2375 		vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
2376 
2377 		if (++rxc->vxcr_next == rxc->vxcr_ndesc) {
2378 			rxc->vxcr_next = 0;
2379 			rxc->vxcr_gen ^= 1;
2380 		}
2381 
2382 		idx = rxcd->rxd_idx;
2383 		length = rxcd->len;
2384 		if (rxcd->qid < sc->vmx_nrxqueues)
2385 			rxr = &rxq->vxrxq_cmd_ring[0];
2386 		else
2387 			rxr = &rxq->vxrxq_cmd_ring[1];
2388 		rxd = &rxr->vxrxr_rxd[idx];
2389 
2390 		m = rxr->vxrxr_rxbuf[idx].vrxb_m;
2391 		KASSERT(m != NULL);
2392 
2393 		/*
2394 		 * The host may skip descriptors. We detect this when this
2395 		 * descriptor does not match the previous fill index. Catch
2396 		 * up with the host now.
2397 		 */
2398 		if (__predict_false(rxr->vxrxr_fill != idx)) {
2399 			while (rxr->vxrxr_fill != idx) {
2400 				rxr->vxrxr_rxd[rxr->vxrxr_fill].gen =
2401 				    rxr->vxrxr_gen;
2402 				vmxnet3_rxr_increment_fill(rxr);
2403 			}
2404 		}
2405 
2406 		if (rxcd->sop) {
2407 			/* start of frame w/o head buffer */
2408 			KASSERT(rxd->btype == VMXNET3_BTYPE_HEAD);
2409 			/* start of frame not in ring 0 */
2410 			KASSERT(rxr == &rxq->vxrxq_cmd_ring[0]);
2411 			/* duplicate start of frame? */
2412 			KASSERT(m_head == NULL);
2413 
2414 			if (length == 0) {
2415 				/* Just ignore this descriptor. */
2416 				vmxnet3_rxq_eof_discard(rxq, rxr, idx);
2417 				goto nextp;
2418 			}
2419 
2420 			if (vmxnet3_newbuf(sc, rxq, rxr) != 0) {
2421 				if_statinc(ifp, if_iqdrops);
2422 				vmxnet3_rxq_eof_discard(rxq, rxr, idx);
2423 				if (!rxcd->eop)
2424 					vmxnet3_rxq_discard_chain(rxq);
2425 				goto nextp;
2426 			}
2427 
2428 			m_set_rcvif(m, ifp);
2429 			m->m_pkthdr.len = m->m_len = length;
2430 			m->m_pkthdr.csum_flags = 0;
2431 			m_head = m_tail = m;
2432 
2433 		} else {
2434 			/* non start of frame w/o body buffer */
2435 			KASSERT(rxd->btype == VMXNET3_BTYPE_BODY);
2436 			/* frame not started? */
2437 			KASSERT(m_head != NULL);
2438 
2439 			if (vmxnet3_newbuf(sc, rxq, rxr) != 0) {
2440 				if_statinc(ifp, if_iqdrops);
2441 				vmxnet3_rxq_eof_discard(rxq, rxr, idx);
2442 				if (!rxcd->eop)
2443 					vmxnet3_rxq_discard_chain(rxq);
2444 				m_freem(m_head);
2445 				m_head = m_tail = NULL;
2446 				goto nextp;
2447 			}
2448 
2449 			m->m_len = length;
2450 			m_head->m_pkthdr.len += length;
2451 			m_tail->m_next = m;
2452 			m_tail = m;
2453 		}
2454 
2455 		if (rxcd->eop) {
2456 			vmxnet3_rxq_input(rxq, rxcd, m_head);
2457 			m_head = m_tail = NULL;
2458 
2459 			/* Must recheck after dropping the Rx lock. */
2460 			if ((ifp->if_flags & IFF_RUNNING) == 0)
2461 				break;
2462 		}
2463 
2464 nextp:
2465 		if (__predict_false(rxq->vxrxq_rs->update_rxhead)) {
2466 			int qid = rxcd->qid;
2467 			bus_size_t r;
2468 
2469 			idx = (idx + 1) % rxr->vxrxr_ndesc;
2470 			if (qid >= sc->vmx_nrxqueues) {
2471 				qid -= sc->vmx_nrxqueues;
2472 				r = VMXNET3_BAR0_RXH2(qid);
2473 			} else
2474 				r = VMXNET3_BAR0_RXH1(qid);
2475 			vmxnet3_write_bar0(sc, r, idx);
2476 		}
2477 	}
2478 
2479 	return more;
2480 }
2481 
2482 static inline void
2483 vmxnet3_sched_handle_queue(struct vmxnet3_softc *sc, struct vmxnet3_queue *vmxq)
2484 {
2485 
2486 	if (vmxq->vxq_workqueue) {
2487 		workqueue_enqueue(sc->vmx_queue_wq, &vmxq->vxq_wq_cookie,
2488 		    curcpu());
2489 	} else {
2490 		softint_schedule(vmxq->vxq_si);
2491 	}
2492 }
2493 
2494 static int
2495 vmxnet3_legacy_intr(void *xsc)
2496 {
2497 	struct vmxnet3_softc *sc;
2498 	struct vmxnet3_rxqueue *rxq;
2499 	struct vmxnet3_txqueue *txq;
2500 	u_int txlimit, rxlimit;
2501 	bool txmore, rxmore;
2502 
2503 	sc = xsc;
2504 	rxq = &sc->vmx_queue[0].vxq_rxqueue;
2505 	txq = &sc->vmx_queue[0].vxq_txqueue;
2506 	txlimit = sc->vmx_tx_intr_process_limit;
2507 	rxlimit = sc->vmx_rx_intr_process_limit;
2508 
2509 	if (sc->vmx_intr_type == VMXNET3_IT_LEGACY) {
2510 		if (vmxnet3_read_bar1(sc, VMXNET3_BAR1_INTR) == 0)
2511 			return (0);
2512 	}
2513 	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
2514 		vmxnet3_disable_all_intrs(sc);
2515 
2516 	if (sc->vmx_ds->event != 0)
2517 		vmxnet3_evintr(sc);
2518 
2519 	VMXNET3_RXQ_LOCK(rxq);
2520 	rxmore = vmxnet3_rxq_eof(rxq, rxlimit);
2521 	VMXNET3_RXQ_UNLOCK(rxq);
2522 
2523 	VMXNET3_TXQ_LOCK(txq);
2524 	txmore = vmxnet3_txq_eof(txq, txlimit);
2525 	VMXNET3_TXQ_UNLOCK(txq);
2526 
2527 	if (txmore || rxmore) {
2528 		vmxnet3_sched_handle_queue(sc, &sc->vmx_queue[0]);
2529 	} else {
2530 		if_schedule_deferred_start(&sc->vmx_ethercom.ec_if);
2531 		vmxnet3_enable_all_intrs(sc);
2532 	}
2533 	return (1);
2534 }
2535 
2536 static int
2537 vmxnet3_txrxq_intr(void *xvmxq)
2538 {
2539 	struct vmxnet3_softc *sc;
2540 	struct vmxnet3_queue *vmxq;
2541 	struct vmxnet3_txqueue *txq;
2542 	struct vmxnet3_rxqueue *rxq;
2543 	u_int txlimit, rxlimit;
2544 	bool txmore, rxmore;
2545 
2546 	vmxq = xvmxq;
2547 	txq = &vmxq->vxq_txqueue;
2548 	rxq = &vmxq->vxq_rxqueue;
2549 	sc = txq->vxtxq_sc;
2550 	txlimit = sc->vmx_tx_intr_process_limit;
2551 	rxlimit = sc->vmx_rx_intr_process_limit;
2552 	vmxq->vxq_workqueue = sc->vmx_txrx_workqueue;
2553 
2554 	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
2555 		vmxnet3_disable_intr(sc, vmxq->vxq_intr_idx);
2556 
2557 	VMXNET3_TXQ_LOCK(txq);
2558 	txq->vxtxq_intr.ev_count++;
2559 	txmore = vmxnet3_txq_eof(txq, txlimit);
2560 	VMXNET3_TXQ_UNLOCK(txq);
2561 
2562 	VMXNET3_RXQ_LOCK(rxq);
2563 	rxq->vxrxq_intr.ev_count++;
2564 	rxmore = vmxnet3_rxq_eof(rxq, rxlimit);
2565 	VMXNET3_RXQ_UNLOCK(rxq);
2566 
2567 	if (txmore || rxmore) {
2568 		vmxnet3_sched_handle_queue(sc, vmxq);
2569 	} else {
2570 		/* for ALTQ */
2571 		if (vmxq->vxq_id == 0)
2572 			if_schedule_deferred_start(&sc->vmx_ethercom.ec_if);
2573 		softint_schedule(txq->vxtxq_si);
2574 
2575 		vmxnet3_enable_intr(sc, vmxq->vxq_intr_idx);
2576 	}
2577 
2578 	return (1);
2579 }
2580 
2581 static void
2582 vmxnet3_handle_queue(void *xvmxq)
2583 {
2584 	struct vmxnet3_softc *sc;
2585 	struct vmxnet3_queue *vmxq;
2586 	struct vmxnet3_txqueue *txq;
2587 	struct vmxnet3_rxqueue *rxq;
2588 	u_int txlimit, rxlimit;
2589 	bool txmore, rxmore;
2590 
2591 	vmxq = xvmxq;
2592 	txq = &vmxq->vxq_txqueue;
2593 	rxq = &vmxq->vxq_rxqueue;
2594 	sc = txq->vxtxq_sc;
2595 	txlimit = sc->vmx_tx_process_limit;
2596 	rxlimit = sc->vmx_rx_process_limit;
2597 
2598 	VMXNET3_TXQ_LOCK(txq);
2599 	txq->vxtxq_defer.ev_count++;
2600 	txmore = vmxnet3_txq_eof(txq, txlimit);
2601 	if (txmore)
2602 		txq->vxtxq_deferreq.ev_count++;
2603 	/* for ALTQ */
2604 	if (vmxq->vxq_id == 0)
2605 		if_schedule_deferred_start(&sc->vmx_ethercom.ec_if);
2606 	softint_schedule(txq->vxtxq_si);
2607 	VMXNET3_TXQ_UNLOCK(txq);
2608 
2609 	VMXNET3_RXQ_LOCK(rxq);
2610 	rxq->vxrxq_defer.ev_count++;
2611 	rxmore = vmxnet3_rxq_eof(rxq, rxlimit);
2612 	if (rxmore)
2613 		rxq->vxrxq_deferreq.ev_count++;
2614 	VMXNET3_RXQ_UNLOCK(rxq);
2615 
2616 	if (txmore || rxmore)
2617 		vmxnet3_sched_handle_queue(sc, vmxq);
2618 	else
2619 		vmxnet3_enable_intr(sc, vmxq->vxq_intr_idx);
2620 }
2621 
2622 static void
2623 vmxnet3_handle_queue_work(struct work *wk, void *context)
2624 {
2625 	struct vmxnet3_queue *vmxq;
2626 
2627 	vmxq = container_of(wk, struct vmxnet3_queue, vxq_wq_cookie);
2628 	vmxnet3_handle_queue(vmxq);
2629 }
2630 
2631 static int
2632 vmxnet3_event_intr(void *xsc)
2633 {
2634 	struct vmxnet3_softc *sc;
2635 
2636 	sc = xsc;
2637 
2638 	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
2639 		vmxnet3_disable_intr(sc, sc->vmx_event_intr_idx);
2640 
2641 	sc->vmx_event_intr.ev_count++;
2642 
2643 	if (sc->vmx_ds->event != 0)
2644 		vmxnet3_evintr(sc);
2645 
2646 	vmxnet3_enable_intr(sc, sc->vmx_event_intr_idx);
2647 
2648 	return (1);
2649 }
2650 
2651 static void
2652 vmxnet3_txstop(struct vmxnet3_softc *sc, struct vmxnet3_txqueue *txq)
2653 {
2654 	struct vmxnet3_txring *txr;
2655 	struct vmxnet3_txbuf *txb;
2656 	u_int i;
2657 
2658 	txr = &txq->vxtxq_cmd_ring;
2659 
2660 	for (i = 0; i < txr->vxtxr_ndesc; i++) {
2661 		txb = &txr->vxtxr_txbuf[i];
2662 
2663 		if (txb->vtxb_m == NULL)
2664 			continue;
2665 
2666 		bus_dmamap_sync(sc->vmx_dmat, txb->vtxb_dmamap,
2667 		    0, txb->vtxb_dmamap->dm_mapsize,
2668 		    BUS_DMASYNC_POSTWRITE);
2669 		bus_dmamap_unload(sc->vmx_dmat, txb->vtxb_dmamap);
2670 		m_freem(txb->vtxb_m);
2671 		txb->vtxb_m = NULL;
2672 	}
2673 }
2674 
2675 static void
2676 vmxnet3_rxstop(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rxq)
2677 {
2678 	struct vmxnet3_rxring *rxr;
2679 	struct vmxnet3_rxbuf *rxb;
2680 	u_int i, j;
2681 
2682 	if (rxq->vxrxq_mhead != NULL) {
2683 		m_freem(rxq->vxrxq_mhead);
2684 		rxq->vxrxq_mhead = NULL;
2685 		rxq->vxrxq_mtail = NULL;
2686 	}
2687 
2688 	for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
2689 		rxr = &rxq->vxrxq_cmd_ring[i];
2690 
2691 		for (j = 0; j < rxr->vxrxr_ndesc; j++) {
2692 			rxb = &rxr->vxrxr_rxbuf[j];
2693 
2694 			if (rxb->vrxb_m == NULL)
2695 				continue;
2696 
2697 			bus_dmamap_sync(sc->vmx_dmat, rxb->vrxb_dmamap,
2698 			    0, rxb->vrxb_dmamap->dm_mapsize,
2699 			    BUS_DMASYNC_POSTREAD);
2700 			bus_dmamap_unload(sc->vmx_dmat, rxb->vrxb_dmamap);
2701 			m_freem(rxb->vrxb_m);
2702 			rxb->vrxb_m = NULL;
2703 		}
2704 	}
2705 }
2706 
2707 static void
2708 vmxnet3_stop_rendezvous(struct vmxnet3_softc *sc)
2709 {
2710 	struct vmxnet3_rxqueue *rxq;
2711 	struct vmxnet3_txqueue *txq;
2712 	int i;
2713 
2714 	for (i = 0; i < sc->vmx_nrxqueues; i++) {
2715 		rxq = &sc->vmx_queue[i].vxq_rxqueue;
2716 		VMXNET3_RXQ_LOCK(rxq);
2717 		VMXNET3_RXQ_UNLOCK(rxq);
2718 	}
2719 	for (i = 0; i < sc->vmx_ntxqueues; i++) {
2720 		txq = &sc->vmx_queue[i].vxq_txqueue;
2721 		VMXNET3_TXQ_LOCK(txq);
2722 		VMXNET3_TXQ_UNLOCK(txq);
2723 	}
2724 }
2725 
2726 static void
2727 vmxnet3_stop_locked(struct vmxnet3_softc *sc)
2728 {
2729 	struct ifnet *ifp;
2730 	int q;
2731 
2732 	ifp = &sc->vmx_ethercom.ec_if;
2733 	VMXNET3_CORE_LOCK_ASSERT(sc);
2734 
2735 	ifp->if_flags &= ~IFF_RUNNING;
2736 	sc->vmx_link_active = 0;
2737 	callout_stop(&sc->vmx_tick);
2738 
2739 	/* Disable interrupts. */
2740 	vmxnet3_disable_all_intrs(sc);
2741 	vmxnet3_write_cmd(sc, VMXNET3_CMD_DISABLE);
2742 
2743 	vmxnet3_stop_rendezvous(sc);
2744 
2745 	for (q = 0; q < sc->vmx_ntxqueues; q++)
2746 		vmxnet3_txstop(sc, &sc->vmx_queue[q].vxq_txqueue);
2747 	for (q = 0; q < sc->vmx_nrxqueues; q++)
2748 		vmxnet3_rxstop(sc, &sc->vmx_queue[q].vxq_rxqueue);
2749 
2750 	vmxnet3_write_cmd(sc, VMXNET3_CMD_RESET);
2751 }
2752 
2753 static void
2754 vmxnet3_stop(struct ifnet *ifp, int disable)
2755 {
2756 	struct vmxnet3_softc *sc = ifp->if_softc;
2757 
2758 	VMXNET3_CORE_LOCK(sc);
2759 	vmxnet3_stop_locked(sc);
2760 	VMXNET3_CORE_UNLOCK(sc);
2761 }
2762 
2763 static void
2764 vmxnet3_txinit(struct vmxnet3_softc *sc, struct vmxnet3_txqueue *txq)
2765 {
2766 	struct vmxnet3_txring *txr;
2767 	struct vmxnet3_comp_ring *txc;
2768 
2769 	txr = &txq->vxtxq_cmd_ring;
2770 	txr->vxtxr_head = 0;
2771 	txr->vxtxr_next = 0;
2772 	txr->vxtxr_gen = VMXNET3_INIT_GEN;
2773 	memset(txr->vxtxr_txd, 0,
2774 	    txr->vxtxr_ndesc * sizeof(struct vmxnet3_txdesc));
2775 
2776 	txc = &txq->vxtxq_comp_ring;
2777 	txc->vxcr_next = 0;
2778 	txc->vxcr_gen = VMXNET3_INIT_GEN;
2779 	memset(txc->vxcr_u.txcd, 0,
2780 	    txc->vxcr_ndesc * sizeof(struct vmxnet3_txcompdesc));
2781 }
2782 
2783 static int
2784 vmxnet3_rxinit(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rxq)
2785 {
2786 	struct vmxnet3_rxring *rxr;
2787 	struct vmxnet3_comp_ring *rxc;
2788 	u_int i, populate, idx;
2789 	int error;
2790 
2791 	/* LRO and jumbo frame is not supported yet */
2792 	populate = 1;
2793 
2794 	for (i = 0; i < populate; i++) {
2795 		rxr = &rxq->vxrxq_cmd_ring[i];
2796 		rxr->vxrxr_fill = 0;
2797 		rxr->vxrxr_gen = VMXNET3_INIT_GEN;
2798 		memset(rxr->vxrxr_rxd, 0,
2799 		    rxr->vxrxr_ndesc * sizeof(struct vmxnet3_rxdesc));
2800 
2801 		for (idx = 0; idx < rxr->vxrxr_ndesc; idx++) {
2802 			error = vmxnet3_newbuf(sc, rxq, rxr);
2803 			if (error)
2804 				return (error);
2805 		}
2806 	}
2807 
2808 	for (/**/; i < VMXNET3_RXRINGS_PERQ; i++) {
2809 		rxr = &rxq->vxrxq_cmd_ring[i];
2810 		rxr->vxrxr_fill = 0;
2811 		rxr->vxrxr_gen = 0;
2812 		memset(rxr->vxrxr_rxd, 0,
2813 		    rxr->vxrxr_ndesc * sizeof(struct vmxnet3_rxdesc));
2814 	}
2815 
2816 	rxc = &rxq->vxrxq_comp_ring;
2817 	rxc->vxcr_next = 0;
2818 	rxc->vxcr_gen = VMXNET3_INIT_GEN;
2819 	memset(rxc->vxcr_u.rxcd, 0,
2820 	    rxc->vxcr_ndesc * sizeof(struct vmxnet3_rxcompdesc));
2821 
2822 	return (0);
2823 }
2824 
2825 static int
2826 vmxnet3_reinit_queues(struct vmxnet3_softc *sc)
2827 {
2828 	device_t dev;
2829 	int q, error;
2830 	dev = sc->vmx_dev;
2831 
2832 	for (q = 0; q < sc->vmx_ntxqueues; q++)
2833 		vmxnet3_txinit(sc, &sc->vmx_queue[q].vxq_txqueue);
2834 
2835 	for (q = 0; q < sc->vmx_nrxqueues; q++) {
2836 		error = vmxnet3_rxinit(sc, &sc->vmx_queue[q].vxq_rxqueue);
2837 		if (error) {
2838 			device_printf(dev, "cannot populate Rx queue %d\n", q);
2839 			return (error);
2840 		}
2841 	}
2842 
2843 	return (0);
2844 }
2845 
2846 static int
2847 vmxnet3_enable_device(struct vmxnet3_softc *sc)
2848 {
2849 	int q;
2850 
2851 	if (vmxnet3_read_cmd(sc, VMXNET3_CMD_ENABLE) != 0) {
2852 		device_printf(sc->vmx_dev, "device enable command failed!\n");
2853 		return (1);
2854 	}
2855 
2856 	/* Reset the Rx queue heads. */
2857 	for (q = 0; q < sc->vmx_nrxqueues; q++) {
2858 		vmxnet3_write_bar0(sc, VMXNET3_BAR0_RXH1(q), 0);
2859 		vmxnet3_write_bar0(sc, VMXNET3_BAR0_RXH2(q), 0);
2860 	}
2861 
2862 	return (0);
2863 }
2864 
2865 static void
2866 vmxnet3_reinit_rxfilters(struct vmxnet3_softc *sc)
2867 {
2868 
2869 	vmxnet3_set_rxfilter(sc);
2870 
2871 	memset(sc->vmx_ds->vlan_filter, 0, sizeof(sc->vmx_ds->vlan_filter));
2872 	vmxnet3_write_cmd(sc, VMXNET3_CMD_VLAN_FILTER);
2873 }
2874 
2875 static int
2876 vmxnet3_reinit(struct vmxnet3_softc *sc)
2877 {
2878 
2879 	vmxnet3_set_lladdr(sc);
2880 	vmxnet3_reinit_shared_data(sc);
2881 
2882 	if (vmxnet3_reinit_queues(sc) != 0)
2883 		return (ENXIO);
2884 
2885 	if (vmxnet3_enable_device(sc) != 0)
2886 		return (ENXIO);
2887 
2888 	vmxnet3_reinit_rxfilters(sc);
2889 
2890 	return (0);
2891 }
2892 
2893 static int
2894 vmxnet3_init_locked(struct vmxnet3_softc *sc)
2895 {
2896 	struct ifnet *ifp = &sc->vmx_ethercom.ec_if;
2897 	int error;
2898 
2899 	vmxnet3_stop_locked(sc);
2900 
2901 	error = vmxnet3_reinit(sc);
2902 	if (error) {
2903 		vmxnet3_stop_locked(sc);
2904 		return (error);
2905 	}
2906 
2907 	ifp->if_flags |= IFF_RUNNING;
2908 	vmxnet3_if_link_status(sc);
2909 
2910 	vmxnet3_enable_all_intrs(sc);
2911 	callout_reset(&sc->vmx_tick, hz, vmxnet3_tick, sc);
2912 
2913 	return (0);
2914 }
2915 
2916 static int
2917 vmxnet3_init(struct ifnet *ifp)
2918 {
2919 	struct vmxnet3_softc *sc = ifp->if_softc;
2920 	int error;
2921 
2922 	VMXNET3_CORE_LOCK(sc);
2923 	error = vmxnet3_init_locked(sc);
2924 	VMXNET3_CORE_UNLOCK(sc);
2925 
2926 	return (error);
2927 }
2928 
2929 static int
2930 vmxnet3_txq_offload_ctx(struct vmxnet3_txqueue *txq, struct mbuf *m,
2931     int *start, int *csum_start)
2932 {
2933 	struct ether_header *eh;
2934 	struct mbuf *mp;
2935 	int offset, csum_off, iphl, offp;
2936 	bool v4;
2937 
2938 	eh = mtod(m, struct ether_header *);
2939 	switch (htons(eh->ether_type)) {
2940 	case ETHERTYPE_IP:
2941 	case ETHERTYPE_IPV6:
2942 		offset = ETHER_HDR_LEN;
2943 		break;
2944 	case ETHERTYPE_VLAN:
2945 		offset = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
2946 		break;
2947 	default:
2948 		m_freem(m);
2949 		return (EINVAL);
2950 	}
2951 
2952 	if ((m->m_pkthdr.csum_flags &
2953 	    (M_CSUM_TSOv4 | M_CSUM_UDPv4 | M_CSUM_TCPv4)) != 0) {
2954 		iphl = M_CSUM_DATA_IPv4_IPHL(m->m_pkthdr.csum_data);
2955 		v4 = true;
2956 	} else {
2957 		iphl = M_CSUM_DATA_IPv6_IPHL(m->m_pkthdr.csum_data);
2958 		v4 = false;
2959 	}
2960 	*start = offset + iphl;
2961 
2962 	if (m->m_pkthdr.csum_flags &
2963 	    (M_CSUM_TCPv4 | M_CSUM_TCPv6 | M_CSUM_TSOv4 | M_CSUM_TSOv6)) {
2964 		csum_off = offsetof(struct tcphdr, th_sum);
2965 	} else {
2966 		csum_off = offsetof(struct udphdr, uh_sum);
2967 	}
2968 
2969 	*csum_start = *start + csum_off;
2970 	mp = m_pulldown(m, 0, *csum_start + 2, &offp);
2971 	if (!mp) {
2972 		/* m is already freed */
2973 		return ENOBUFS;
2974 	}
2975 
2976 	if (m->m_pkthdr.csum_flags & (M_CSUM_TSOv4 | M_CSUM_TSOv6)) {
2977 		struct tcphdr *tcp;
2978 
2979 		txq->vxtxq_stats.vmtxs_tso++;
2980 		tcp = (void *)(mtod(mp, char *) + offp + *start);
2981 
2982 		if (v4) {
2983 			struct ip *ip;
2984 
2985 			ip = (void *)(mtod(mp, char *) + offp + offset);
2986 			tcp->th_sum = in_cksum_phdr(ip->ip_src.s_addr,
2987 			    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
2988 		} else {
2989 			struct ip6_hdr *ip6;
2990 
2991 			ip6 = (void *)(mtod(mp, char *) + offp + offset);
2992 			tcp->th_sum = in6_cksum_phdr(&ip6->ip6_src,
2993 			    &ip6->ip6_dst, 0, htonl(IPPROTO_TCP));
2994 		}
2995 
2996 		/*
2997 		 * For TSO, the size of the protocol header is also
2998 		 * included in the descriptor header size.
2999 		 */
3000 		*start += (tcp->th_off << 2);
3001 	} else
3002 		txq->vxtxq_stats.vmtxs_csum++;
3003 
3004 	return (0);
3005 }
3006 
3007 static int
3008 vmxnet3_txq_load_mbuf(struct vmxnet3_txqueue *txq, struct mbuf **m0,
3009     bus_dmamap_t dmap)
3010 {
3011 	struct mbuf *m;
3012 	bus_dma_tag_t tag;
3013 	int error;
3014 
3015 	m = *m0;
3016 	tag = txq->vxtxq_sc->vmx_dmat;
3017 
3018 	error = bus_dmamap_load_mbuf(tag, dmap, m, BUS_DMA_NOWAIT);
3019 	if (error == 0 || error != EFBIG)
3020 		return (error);
3021 
3022 	m = m_defrag(m, M_NOWAIT);
3023 	if (m != NULL) {
3024 		*m0 = m;
3025 		error = bus_dmamap_load_mbuf(tag, dmap, m, BUS_DMA_NOWAIT);
3026 	} else
3027 		error = ENOBUFS;
3028 
3029 	if (error) {
3030 		m_freem(*m0);
3031 		*m0 = NULL;
3032 		txq->vxtxq_defrag_failed.ev_count++;
3033 	} else
3034 		txq->vxtxq_defragged.ev_count++;
3035 
3036 	return (error);
3037 }
3038 
3039 static void
3040 vmxnet3_txq_unload_mbuf(struct vmxnet3_txqueue *txq, bus_dmamap_t dmap)
3041 {
3042 
3043 	bus_dmamap_unload(txq->vxtxq_sc->vmx_dmat, dmap);
3044 }
3045 
3046 static int
3047 vmxnet3_txq_encap(struct vmxnet3_txqueue *txq, struct mbuf **m0)
3048 {
3049 	struct vmxnet3_softc *sc;
3050 	struct vmxnet3_txring *txr;
3051 	struct vmxnet3_txdesc *txd, *sop;
3052 	struct mbuf *m;
3053 	bus_dmamap_t dmap;
3054 	bus_dma_segment_t *segs;
3055 	int i, gen, start, csum_start, nsegs, error;
3056 
3057 	sc = txq->vxtxq_sc;
3058 	start = 0;
3059 	txd = NULL;
3060 	txr = &txq->vxtxq_cmd_ring;
3061 	dmap = txr->vxtxr_txbuf[txr->vxtxr_head].vtxb_dmamap;
3062 	csum_start = 0; /* GCC */
3063 
3064 	error = vmxnet3_txq_load_mbuf(txq, m0, dmap);
3065 	if (error)
3066 		return (error);
3067 
3068 	nsegs = dmap->dm_nsegs;
3069 	segs = dmap->dm_segs;
3070 
3071 	m = *m0;
3072 	KASSERT(m->m_flags & M_PKTHDR);
3073 	KASSERT(nsegs <= VMXNET3_TX_MAXSEGS);
3074 
3075 	if (vmxnet3_txring_avail(txr) < nsegs) {
3076 		txq->vxtxq_stats.vmtxs_full++;
3077 		vmxnet3_txq_unload_mbuf(txq, dmap);
3078 		return (ENOSPC);
3079 	} else if (m->m_pkthdr.csum_flags & VMXNET3_CSUM_ALL_OFFLOAD) {
3080 		error = vmxnet3_txq_offload_ctx(txq, m, &start, &csum_start);
3081 		if (error) {
3082 			/* m is already freed */
3083 			txq->vxtxq_stats.vmtxs_offload_failed++;
3084 			vmxnet3_txq_unload_mbuf(txq, dmap);
3085 			*m0 = NULL;
3086 			return (error);
3087 		}
3088 	}
3089 
3090 	txr->vxtxr_txbuf[txr->vxtxr_head].vtxb_m = m;
3091 	sop = &txr->vxtxr_txd[txr->vxtxr_head];
3092 	gen = txr->vxtxr_gen ^ 1;	/* Owned by cpu (yet) */
3093 
3094 	for (i = 0; i < nsegs; i++) {
3095 		txd = &txr->vxtxr_txd[txr->vxtxr_head];
3096 
3097 		txd->addr = segs[i].ds_addr;
3098 		txd->len = segs[i].ds_len;
3099 		txd->gen = gen;
3100 		txd->dtype = 0;
3101 		txd->offload_mode = VMXNET3_OM_NONE;
3102 		txd->offload_pos = 0;
3103 		txd->hlen = 0;
3104 		txd->eop = 0;
3105 		txd->compreq = 0;
3106 		txd->vtag_mode = 0;
3107 		txd->vtag = 0;
3108 
3109 		if (++txr->vxtxr_head == txr->vxtxr_ndesc) {
3110 			txr->vxtxr_head = 0;
3111 			txr->vxtxr_gen ^= 1;
3112 		}
3113 		gen = txr->vxtxr_gen;
3114 	}
3115 	txd->eop = 1;
3116 	txd->compreq = 1;
3117 
3118 	if (vlan_has_tag(m)) {
3119 		sop->vtag_mode = 1;
3120 		sop->vtag = vlan_get_tag(m);
3121 	}
3122 
3123 	if (m->m_pkthdr.csum_flags & (M_CSUM_TSOv4 | M_CSUM_TSOv6)) {
3124 		sop->offload_mode = VMXNET3_OM_TSO;
3125 		sop->hlen = start;
3126 		sop->offload_pos = m->m_pkthdr.segsz;
3127 	} else if (m->m_pkthdr.csum_flags & (VMXNET3_CSUM_OFFLOAD |
3128 	    VMXNET3_CSUM_OFFLOAD_IPV6)) {
3129 		sop->offload_mode = VMXNET3_OM_CSUM;
3130 		sop->hlen = start;
3131 		sop->offload_pos = csum_start;
3132 	}
3133 
3134 	/* Finally, change the ownership. */
3135 	vmxnet3_barrier(sc, VMXNET3_BARRIER_WR);
3136 	sop->gen ^= 1;
3137 
3138 	txq->vxtxq_ts->npending += nsegs;
3139 	if (txq->vxtxq_ts->npending >= txq->vxtxq_ts->intr_threshold) {
3140 		struct vmxnet3_queue *vmxq;
3141 		vmxq = container_of(txq, struct vmxnet3_queue, vxq_txqueue);
3142 		txq->vxtxq_ts->npending = 0;
3143 		vmxnet3_write_bar0(sc, VMXNET3_BAR0_TXH(vmxq->vxq_id),
3144 		    txr->vxtxr_head);
3145 	}
3146 
3147 	return (0);
3148 }
3149 
3150 #define VMXNET3_TX_START 1
3151 #define VMXNET3_TX_TRANSMIT 2
3152 static inline void
3153 vmxnet3_tx_common_locked(struct ifnet *ifp, struct vmxnet3_txqueue *txq, int txtype)
3154 {
3155 	struct vmxnet3_softc *sc;
3156 	struct vmxnet3_txring *txr;
3157 	struct mbuf *m_head;
3158 	int tx;
3159 
3160 	sc = ifp->if_softc;
3161 	txr = &txq->vxtxq_cmd_ring;
3162 	tx = 0;
3163 
3164 	VMXNET3_TXQ_LOCK_ASSERT(txq);
3165 
3166 	if ((ifp->if_flags & IFF_RUNNING) == 0 ||
3167 	    sc->vmx_link_active == 0)
3168 		return;
3169 
3170 	for (;;) {
3171 		if (txtype == VMXNET3_TX_START)
3172 			IFQ_POLL(&ifp->if_snd, m_head);
3173 		else
3174 			m_head = pcq_peek(txq->vxtxq_interq);
3175 		if (m_head == NULL)
3176 			break;
3177 
3178 		if (vmxnet3_txring_avail(txr) < VMXNET3_TX_MAXSEGS)
3179 			break;
3180 
3181 		if (txtype == VMXNET3_TX_START)
3182 			IFQ_DEQUEUE(&ifp->if_snd, m_head);
3183 		else
3184 			m_head = pcq_get(txq->vxtxq_interq);
3185 		if (m_head == NULL)
3186 			break;
3187 
3188 		if (vmxnet3_txq_encap(txq, &m_head) != 0) {
3189 			if (m_head != NULL)
3190 				m_freem(m_head);
3191 			break;
3192 		}
3193 
3194 		tx++;
3195 		bpf_mtap(ifp, m_head, BPF_D_OUT);
3196 	}
3197 
3198 	if (tx > 0)
3199 		txq->vxtxq_watchdog = VMXNET3_WATCHDOG_TIMEOUT;
3200 }
3201 
3202 static void
3203 vmxnet3_start_locked(struct ifnet *ifp)
3204 {
3205 	struct vmxnet3_softc *sc;
3206 	struct vmxnet3_txqueue *txq;
3207 
3208 	sc = ifp->if_softc;
3209 	txq = &sc->vmx_queue[0].vxq_txqueue;
3210 
3211 	vmxnet3_tx_common_locked(ifp, txq, VMXNET3_TX_START);
3212 }
3213 
3214 void
3215 vmxnet3_start(struct ifnet *ifp)
3216 {
3217 	struct vmxnet3_softc *sc;
3218 	struct vmxnet3_txqueue *txq;
3219 
3220 	sc = ifp->if_softc;
3221 	txq = &sc->vmx_queue[0].vxq_txqueue;
3222 
3223 	VMXNET3_TXQ_LOCK(txq);
3224 	vmxnet3_start_locked(ifp);
3225 	VMXNET3_TXQ_UNLOCK(txq);
3226 }
3227 
3228 static int
3229 vmxnet3_select_txqueue(struct ifnet *ifp, struct mbuf *m __unused)
3230 {
3231 	struct vmxnet3_softc *sc;
3232 	u_int cpuid;
3233 
3234 	sc = ifp->if_softc;
3235 	cpuid = cpu_index(curcpu());
3236 	/*
3237 	 * Furure work
3238 	 * We should select txqueue to even up the load even if ncpu is
3239 	 * different from sc->vmx_ntxqueues. Currently, the load is not
3240 	 * even, that is, when ncpu is six and ntxqueues is four, the load
3241 	 * of vmx_queue[0] and vmx_queue[1] is higher than vmx_queue[2] and
3242 	 * vmx_queue[3] because CPU#4 always uses vmx_queue[0] and CPU#5 always
3243 	 * uses vmx_queue[1].
3244 	 * Furthermore, we should not use random value to select txqueue to
3245 	 * avoid reordering. We should use flow information of mbuf.
3246 	 */
3247 	return cpuid % sc->vmx_ntxqueues;
3248 }
3249 
3250 static void
3251 vmxnet3_transmit_locked(struct ifnet *ifp, struct vmxnet3_txqueue *txq)
3252 {
3253 
3254 	vmxnet3_tx_common_locked(ifp, txq, VMXNET3_TX_TRANSMIT);
3255 }
3256 
3257 static int
3258 vmxnet3_transmit(struct ifnet *ifp, struct mbuf *m)
3259 {
3260 	struct vmxnet3_softc *sc;
3261 	struct vmxnet3_txqueue *txq;
3262 	int qid;
3263 
3264 	qid = vmxnet3_select_txqueue(ifp, m);
3265 	sc = ifp->if_softc;
3266 	txq = &sc->vmx_queue[qid].vxq_txqueue;
3267 
3268 	if (__predict_false(!pcq_put(txq->vxtxq_interq, m))) {
3269 		VMXNET3_TXQ_LOCK(txq);
3270 		txq->vxtxq_pcqdrop.ev_count++;
3271 		VMXNET3_TXQ_UNLOCK(txq);
3272 		m_freem(m);
3273 		return ENOBUFS;
3274 	}
3275 
3276 	if (VMXNET3_TXQ_TRYLOCK(txq)) {
3277 		vmxnet3_transmit_locked(ifp, txq);
3278 		VMXNET3_TXQ_UNLOCK(txq);
3279 	} else {
3280 		kpreempt_disable();
3281 		softint_schedule(txq->vxtxq_si);
3282 		kpreempt_enable();
3283 	}
3284 
3285 	return 0;
3286 }
3287 
3288 static void
3289 vmxnet3_deferred_transmit(void *arg)
3290 {
3291 	struct vmxnet3_txqueue *txq = arg;
3292 	struct vmxnet3_softc *sc = txq->vxtxq_sc;
3293 	struct ifnet *ifp = &sc->vmx_ethercom.ec_if;
3294 
3295 	VMXNET3_TXQ_LOCK(txq);
3296 	txq->vxtxq_transmitdef.ev_count++;
3297 	if (pcq_peek(txq->vxtxq_interq) != NULL)
3298 		vmxnet3_transmit_locked(ifp, txq);
3299 	VMXNET3_TXQ_UNLOCK(txq);
3300 }
3301 
3302 static void
3303 vmxnet3_set_rxfilter(struct vmxnet3_softc *sc)
3304 {
3305 	struct ifnet *ifp = &sc->vmx_ethercom.ec_if;
3306 	struct ethercom *ec = &sc->vmx_ethercom;
3307 	struct vmxnet3_driver_shared *ds = sc->vmx_ds;
3308 	struct ether_multi *enm;
3309 	struct ether_multistep step;
3310 	u_int mode;
3311 	uint8_t *p;
3312 
3313 	ds->mcast_tablelen = 0;
3314 	ETHER_LOCK(ec);
3315 	CLR(ec->ec_flags, ETHER_F_ALLMULTI);
3316 	ETHER_UNLOCK(ec);
3317 
3318 	/*
3319 	 * Always accept broadcast frames.
3320 	 * Always accept frames destined to our station address.
3321 	 */
3322 	mode = VMXNET3_RXMODE_BCAST | VMXNET3_RXMODE_UCAST;
3323 
3324 	ETHER_LOCK(ec);
3325 	if (ISSET(ifp->if_flags, IFF_PROMISC) ||
3326 	    ec->ec_multicnt > VMXNET3_MULTICAST_MAX)
3327 		goto allmulti;
3328 
3329 	p = sc->vmx_mcast;
3330 	ETHER_FIRST_MULTI(step, ec, enm);
3331 	while (enm != NULL) {
3332 		if (memcmp(enm->enm_addrlo, enm->enm_addrhi, ETHER_ADDR_LEN)) {
3333 			/*
3334 			 * We must listen to a range of multicast addresses.
3335 			 * For now, just accept all multicasts, rather than
3336 			 * trying to set only those filter bits needed to match
3337 			 * the range.  (At this time, the only use of address
3338 			 * ranges is for IP multicast routing, for which the
3339 			 * range is big enough to require all bits set.)
3340 			 */
3341 			goto allmulti;
3342 		}
3343 		memcpy(p, enm->enm_addrlo, ETHER_ADDR_LEN);
3344 
3345 		p += ETHER_ADDR_LEN;
3346 
3347 		ETHER_NEXT_MULTI(step, enm);
3348 	}
3349 
3350 	if (ec->ec_multicnt > 0) {
3351 		SET(mode, VMXNET3_RXMODE_MCAST);
3352 		ds->mcast_tablelen = p - sc->vmx_mcast;
3353 	}
3354 	ETHER_UNLOCK(ec);
3355 
3356 	goto setit;
3357 
3358 allmulti:
3359 	SET(ec->ec_flags, ETHER_F_ALLMULTI);
3360 	ETHER_UNLOCK(ec);
3361 	SET(mode, (VMXNET3_RXMODE_ALLMULTI | VMXNET3_RXMODE_MCAST));
3362 	if (ifp->if_flags & IFF_PROMISC)
3363 		SET(mode, VMXNET3_RXMODE_PROMISC);
3364 
3365 setit:
3366 	vmxnet3_write_cmd(sc, VMXNET3_CMD_SET_FILTER);
3367 	ds->rxmode = mode;
3368 	vmxnet3_write_cmd(sc, VMXNET3_CMD_SET_RXMODE);
3369 }
3370 
3371 static int
3372 vmxnet3_ioctl(struct ifnet *ifp, u_long cmd, void *data)
3373 {
3374 	struct vmxnet3_softc *sc = ifp->if_softc;
3375 	struct ifreq *ifr = (struct ifreq *)data;
3376 	int s, error = 0;
3377 
3378 	switch (cmd) {
3379 	case SIOCSIFMTU: {
3380 		int nmtu = ifr->ifr_mtu;
3381 
3382 		if (nmtu < VMXNET3_MIN_MTU || nmtu > VMXNET3_MAX_MTU) {
3383 			error = EINVAL;
3384 			break;
3385 		}
3386 		if (ifp->if_mtu != (uint64_t)nmtu) {
3387 			s = splnet();
3388 			error = ether_ioctl(ifp, cmd, data);
3389 			splx(s);
3390 			if (error == ENETRESET)
3391 				error = vmxnet3_init(ifp);
3392 		}
3393 		break;
3394 	}
3395 
3396 	default:
3397 		s = splnet();
3398 		error = ether_ioctl(ifp, cmd, data);
3399 		splx(s);
3400 	}
3401 
3402 	if (error == ENETRESET) {
3403 		VMXNET3_CORE_LOCK(sc);
3404 		if (ifp->if_flags & IFF_RUNNING)
3405 			vmxnet3_set_rxfilter(sc);
3406 		VMXNET3_CORE_UNLOCK(sc);
3407 		error = 0;
3408 	}
3409 
3410 	return error;
3411 }
3412 
3413 static int
3414 vmxnet3_ifflags_cb(struct ethercom *ec)
3415 {
3416 	struct vmxnet3_softc *sc;
3417 
3418 	sc = ec->ec_if.if_softc;
3419 
3420 	VMXNET3_CORE_LOCK(sc);
3421 	vmxnet3_set_rxfilter(sc);
3422 	VMXNET3_CORE_UNLOCK(sc);
3423 
3424 	vmxnet3_if_link_status(sc);
3425 
3426 	return 0;
3427 }
3428 
3429 static int
3430 vmxnet3_watchdog(struct vmxnet3_txqueue *txq)
3431 {
3432 	struct vmxnet3_softc *sc;
3433 	struct vmxnet3_queue *vmxq;
3434 
3435 	sc = txq->vxtxq_sc;
3436 	vmxq = container_of(txq, struct vmxnet3_queue, vxq_txqueue);
3437 
3438 	VMXNET3_TXQ_LOCK(txq);
3439 	if (txq->vxtxq_watchdog == 0 || --txq->vxtxq_watchdog) {
3440 		VMXNET3_TXQ_UNLOCK(txq);
3441 		return (0);
3442 	}
3443 	txq->vxtxq_watchdogto.ev_count++;
3444 	VMXNET3_TXQ_UNLOCK(txq);
3445 
3446 	device_printf(sc->vmx_dev, "watchdog timeout on queue %d\n",
3447 	    vmxq->vxq_id);
3448 	return (1);
3449 }
3450 
3451 static void
3452 vmxnet3_refresh_host_stats(struct vmxnet3_softc *sc)
3453 {
3454 
3455 	vmxnet3_write_cmd(sc, VMXNET3_CMD_GET_STATS);
3456 }
3457 
3458 static void
3459 vmxnet3_tick(void *xsc)
3460 {
3461 	struct vmxnet3_softc *sc;
3462 	int i, timedout;
3463 
3464 	sc = xsc;
3465 	timedout = 0;
3466 
3467 	VMXNET3_CORE_LOCK(sc);
3468 
3469 	vmxnet3_refresh_host_stats(sc);
3470 
3471 	for (i = 0; i < sc->vmx_ntxqueues; i++)
3472 		timedout |= vmxnet3_watchdog(&sc->vmx_queue[i].vxq_txqueue);
3473 
3474 	if (timedout != 0)
3475 		vmxnet3_init_locked(sc);
3476 	else
3477 		callout_reset(&sc->vmx_tick, hz, vmxnet3_tick, sc);
3478 
3479 	VMXNET3_CORE_UNLOCK(sc);
3480 }
3481 
3482 /*
3483  * update link state of ifnet and softc
3484  */
3485 static void
3486 vmxnet3_if_link_status(struct vmxnet3_softc *sc)
3487 {
3488 	struct ifnet *ifp = &sc->vmx_ethercom.ec_if;
3489 	u_int link;
3490 	bool up;
3491 
3492 	up = vmxnet3_cmd_link_status(ifp);
3493 	if (up) {
3494 		sc->vmx_link_active = 1;
3495 		link = LINK_STATE_UP;
3496 	} else {
3497 		sc->vmx_link_active = 0;
3498 		link = LINK_STATE_DOWN;
3499 	}
3500 
3501 	if_link_state_change(ifp, link);
3502 }
3503 
3504 /*
3505  * check vmx(4) state by VMXNET3_CMD and update ifp->if_baudrate
3506  *   returns
3507  *       - true:  link up
3508  *       - flase: link down
3509  */
3510 static bool
3511 vmxnet3_cmd_link_status(struct ifnet *ifp)
3512 {
3513 	struct vmxnet3_softc *sc = ifp->if_softc;
3514 	u_int x, speed;
3515 
3516 	x = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_LINK);
3517 	if ((x & 1) == 0)
3518 		return false;
3519 
3520 	speed = x >> 16;
3521 	ifp->if_baudrate = IF_Mbps(speed);
3522 	return true;
3523 }
3524 
3525 static void
3526 vmxnet3_ifmedia_status(struct ifnet *ifp, struct ifmediareq *ifmr)
3527 {
3528 	bool up;
3529 
3530 	ifmr->ifm_status = IFM_AVALID;
3531 	ifmr->ifm_active = IFM_ETHER;
3532 
3533 	up = vmxnet3_cmd_link_status(ifp);
3534 	if (!up)
3535 		return;
3536 
3537 	ifmr->ifm_status |= IFM_ACTIVE;
3538 
3539 	if (ifp->if_baudrate >= IF_Gbps(10ULL))
3540 		ifmr->ifm_active |= IFM_10G_T;
3541 }
3542 
3543 static int
3544 vmxnet3_ifmedia_change(struct ifnet *ifp)
3545 {
3546 	return 0;
3547 }
3548 
3549 static void
3550 vmxnet3_set_lladdr(struct vmxnet3_softc *sc)
3551 {
3552 	uint32_t ml, mh;
3553 
3554 	ml  = sc->vmx_lladdr[0];
3555 	ml |= sc->vmx_lladdr[1] << 8;
3556 	ml |= sc->vmx_lladdr[2] << 16;
3557 	ml |= sc->vmx_lladdr[3] << 24;
3558 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_MACL, ml);
3559 
3560 	mh  = sc->vmx_lladdr[4];
3561 	mh |= sc->vmx_lladdr[5] << 8;
3562 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_MACH, mh);
3563 }
3564 
3565 static void
3566 vmxnet3_get_lladdr(struct vmxnet3_softc *sc)
3567 {
3568 	uint32_t ml, mh;
3569 
3570 	ml = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_MACL);
3571 	mh = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_MACH);
3572 
3573 	sc->vmx_lladdr[0] = ml;
3574 	sc->vmx_lladdr[1] = ml >> 8;
3575 	sc->vmx_lladdr[2] = ml >> 16;
3576 	sc->vmx_lladdr[3] = ml >> 24;
3577 	sc->vmx_lladdr[4] = mh;
3578 	sc->vmx_lladdr[5] = mh >> 8;
3579 }
3580 
3581 static void
3582 vmxnet3_enable_all_intrs(struct vmxnet3_softc *sc)
3583 {
3584 	int i;
3585 
3586 	sc->vmx_ds->ictrl &= ~VMXNET3_ICTRL_DISABLE_ALL;
3587 	for (i = 0; i < sc->vmx_nintrs; i++)
3588 		vmxnet3_enable_intr(sc, i);
3589 }
3590 
3591 static void
3592 vmxnet3_disable_all_intrs(struct vmxnet3_softc *sc)
3593 {
3594 	int i;
3595 
3596 	sc->vmx_ds->ictrl |= VMXNET3_ICTRL_DISABLE_ALL;
3597 	for (i = 0; i < sc->vmx_nintrs; i++)
3598 		vmxnet3_disable_intr(sc, i);
3599 }
3600 
3601 static int
3602 vmxnet3_dma_malloc(struct vmxnet3_softc *sc, bus_size_t size, bus_size_t align,
3603     struct vmxnet3_dma_alloc *dma)
3604 {
3605 	bus_dma_tag_t t = sc->vmx_dmat;
3606 	bus_dma_segment_t *segs = dma->dma_segs;
3607 	int n, error;
3608 
3609 	memset(dma, 0, sizeof(*dma));
3610 
3611 	error = bus_dmamem_alloc(t, size, align, 0, segs, 1, &n, BUS_DMA_NOWAIT);
3612 	if (error) {
3613 		aprint_error_dev(sc->vmx_dev, "bus_dmamem_alloc failed: %d\n", error);
3614 		goto fail1;
3615 	}
3616 	KASSERT(n == 1);
3617 
3618 	error = bus_dmamem_map(t, segs, 1, size, &dma->dma_vaddr, BUS_DMA_NOWAIT);
3619 	if (error) {
3620 		aprint_error_dev(sc->vmx_dev, "bus_dmamem_map failed: %d\n", error);
3621 		goto fail2;
3622 	}
3623 
3624 	error = bus_dmamap_create(t, size, 1, size, 0, BUS_DMA_NOWAIT, &dma->dma_map);
3625 	if (error) {
3626 		aprint_error_dev(sc->vmx_dev, "bus_dmamap_create failed: %d\n", error);
3627 		goto fail3;
3628 	}
3629 
3630 	error = bus_dmamap_load(t, dma->dma_map, dma->dma_vaddr, size, NULL,
3631 	    BUS_DMA_NOWAIT);
3632 	if (error) {
3633 		aprint_error_dev(sc->vmx_dev, "bus_dmamap_load failed: %d\n", error);
3634 		goto fail4;
3635 	}
3636 
3637 	memset(dma->dma_vaddr, 0, size);
3638 	dma->dma_paddr = DMAADDR(dma->dma_map);
3639 	dma->dma_size = size;
3640 
3641 	return (0);
3642 fail4:
3643 	bus_dmamap_destroy(t, dma->dma_map);
3644 fail3:
3645 	bus_dmamem_unmap(t, dma->dma_vaddr, size);
3646 fail2:
3647 	bus_dmamem_free(t, segs, 1);
3648 fail1:
3649 	return (error);
3650 }
3651 
3652 static void
3653 vmxnet3_dma_free(struct vmxnet3_softc *sc, struct vmxnet3_dma_alloc *dma)
3654 {
3655 	bus_dma_tag_t t = sc->vmx_dmat;
3656 
3657 	bus_dmamap_unload(t, dma->dma_map);
3658 	bus_dmamap_destroy(t, dma->dma_map);
3659 	bus_dmamem_unmap(t, dma->dma_vaddr, dma->dma_size);
3660 	bus_dmamem_free(t, dma->dma_segs, 1);
3661 
3662 	memset(dma, 0, sizeof(*dma));
3663 }
3664 
3665 MODULE(MODULE_CLASS_DRIVER, if_vmx, "pci");
3666 
3667 #ifdef _MODULE
3668 #include "ioconf.c"
3669 #endif
3670 
3671 static int
3672 if_vmx_modcmd(modcmd_t cmd, void *opaque)
3673 {
3674 	int error = 0;
3675 
3676 	switch (cmd) {
3677 	case MODULE_CMD_INIT:
3678 #ifdef _MODULE
3679 		error = config_init_component(cfdriver_ioconf_if_vmx,
3680 		    cfattach_ioconf_if_vmx, cfdata_ioconf_if_vmx);
3681 #endif
3682 		return error;
3683 	case MODULE_CMD_FINI:
3684 #ifdef _MODULE
3685 		error = config_fini_component(cfdriver_ioconf_if_vmx,
3686 		    cfattach_ioconf_if_vmx, cfdata_ioconf_if_vmx);
3687 #endif
3688 		return error;
3689 	default:
3690 		return ENOTTY;
3691 	}
3692 }
3693 
3694