xref: /netbsd-src/sys/dev/pci/if_vmx.c (revision 901e7e84758515fbf39dfc064cb0b45ab146d8b0)
1 /*	$NetBSD: if_vmx.c,v 1.12 2023/08/10 08:24:44 riastradh Exp $	*/
2 /*	$OpenBSD: if_vmx.c,v 1.16 2014/01/22 06:04:17 brad Exp $	*/
3 
4 /*
5  * Copyright (c) 2013 Tsubai Masanari
6  * Copyright (c) 2013 Bryan Venteicher <bryanv@FreeBSD.org>
7  *
8  * Permission to use, copy, modify, and distribute this software for any
9  * purpose with or without fee is hereby granted, provided that the above
10  * copyright notice and this permission notice appear in all copies.
11  *
12  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
13  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
14  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
15  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
16  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
17  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
18  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
19  */
20 
21 #include <sys/cdefs.h>
22 __KERNEL_RCSID(0, "$NetBSD: if_vmx.c,v 1.12 2023/08/10 08:24:44 riastradh Exp $");
23 
24 #ifdef _KERNEL_OPT
25 #include "opt_if_vmx.h"
26 #endif
27 
28 #include <sys/param.h>
29 #include <sys/cpu.h>
30 #include <sys/kernel.h>
31 #include <sys/kmem.h>
32 #include <sys/bitops.h>
33 #include <sys/bus.h>
34 #include <sys/device.h>
35 #include <sys/mbuf.h>
36 #include <sys/module.h>
37 #include <sys/sockio.h>
38 #include <sys/pcq.h>
39 #include <sys/workqueue.h>
40 #include <sys/interrupt.h>
41 
42 #include <net/bpf.h>
43 #include <net/if.h>
44 #include <net/if_ether.h>
45 #include <net/if_media.h>
46 
47 #include <netinet/if_inarp.h>
48 #include <netinet/in_systm.h>	/* for <netinet/ip.h> */
49 #include <netinet/in.h>		/* for <netinet/ip.h> */
50 #include <netinet/ip.h>		/* for struct ip */
51 #include <netinet/ip6.h>	/* for struct ip6_hdr */
52 #include <netinet/tcp.h>	/* for struct tcphdr */
53 #include <netinet/udp.h>	/* for struct udphdr */
54 
55 #include <dev/pci/pcivar.h>
56 #include <dev/pci/pcireg.h>
57 #include <dev/pci/pcidevs.h>
58 
59 #include <dev/pci/if_vmxreg.h>
60 
61 #define VMXNET3_DRIVER_VERSION 0x00010000
62 
63 /*
64  * Max descriptors per Tx packet. We must limit the size of the
65  * any TSO packets based on the number of segments.
66  */
67 #define VMXNET3_TX_MAXSEGS		32
68 #define VMXNET3_TX_MAXSIZE		(VMXNET3_TX_MAXSEGS * MCLBYTES)
69 
70 /*
71  * Maximum support Tx segments size. The length field in the
72  * Tx descriptor is 14 bits.
73  */
74 #define VMXNET3_TX_MAXSEGSIZE		(1 << 14)
75 
76 /*
77  * The maximum number of Rx segments we accept.
78  */
79 #define VMXNET3_MAX_RX_SEGS		0	/* no segments */
80 
81 /*
82  * Predetermined size of the multicast MACs filter table. If the
83  * number of multicast addresses exceeds this size, then the
84  * ALL_MULTI mode is use instead.
85  */
86 #define VMXNET3_MULTICAST_MAX		32
87 
88 /*
89  * Our Tx watchdog timeout.
90  */
91 #define VMXNET3_WATCHDOG_TIMEOUT	5
92 
93 /*
94  * Default value for vmx_intr_{rx,tx}_process_limit which is used for
95  * max number of packets to process for interrupt handler
96  */
97 #define VMXNET3_RX_INTR_PROCESS_LIMIT 0U
98 #define VMXNET3_TX_INTR_PROCESS_LIMIT 256
99 
100 /*
101  * Default value for vmx_{rx,tx}_process_limit which is used for
102  * max number of packets to process for deferred processing
103  */
104 #define VMXNET3_RX_PROCESS_LIMIT 256
105 #define VMXNET3_TX_PROCESS_LIMIT 256
106 
107 #define VMXNET3_WORKQUEUE_PRI PRI_SOFTNET
108 
109 /*
110  * IP protocols that we can perform Tx checksum offloading of.
111  */
112 #define VMXNET3_CSUM_OFFLOAD \
113     (M_CSUM_TCPv4 | M_CSUM_UDPv4)
114 #define VMXNET3_CSUM_OFFLOAD_IPV6 \
115     (M_CSUM_TCPv6 | M_CSUM_UDPv6)
116 
117 #define VMXNET3_CSUM_ALL_OFFLOAD \
118     (VMXNET3_CSUM_OFFLOAD | VMXNET3_CSUM_OFFLOAD_IPV6 | M_CSUM_TSOv4 | M_CSUM_TSOv6)
119 
120 #define VMXNET3_RXRINGS_PERQ 2
121 
122 #define VMXNET3_CORE_LOCK(_sc)		mutex_enter((_sc)->vmx_mtx)
123 #define VMXNET3_CORE_UNLOCK(_sc)	mutex_exit((_sc)->vmx_mtx)
124 #define VMXNET3_CORE_LOCK_ASSERT(_sc)	mutex_owned((_sc)->vmx_mtx)
125 
126 #define VMXNET3_RXQ_LOCK(_rxq)		mutex_enter((_rxq)->vxrxq_mtx)
127 #define VMXNET3_RXQ_UNLOCK(_rxq)	mutex_exit((_rxq)->vxrxq_mtx)
128 #define VMXNET3_RXQ_LOCK_ASSERT(_rxq)		\
129     mutex_owned((_rxq)->vxrxq_mtx)
130 
131 #define VMXNET3_TXQ_LOCK(_txq)		mutex_enter((_txq)->vxtxq_mtx)
132 #define VMXNET3_TXQ_TRYLOCK(_txq)	mutex_tryenter((_txq)->vxtxq_mtx)
133 #define VMXNET3_TXQ_UNLOCK(_txq)	mutex_exit((_txq)->vxtxq_mtx)
134 #define VMXNET3_TXQ_LOCK_ASSERT(_txq)		\
135     mutex_owned((_txq)->vxtxq_mtx)
136 
137 struct vmxnet3_dma_alloc {
138 	bus_addr_t dma_paddr;
139 	void *dma_vaddr;
140 	bus_dmamap_t dma_map;
141 	bus_size_t dma_size;
142 	bus_dma_segment_t dma_segs[1];
143 };
144 
145 struct vmxnet3_txbuf {
146 	bus_dmamap_t vtxb_dmamap;
147 	struct mbuf *vtxb_m;
148 };
149 
150 struct vmxnet3_txring {
151 	struct vmxnet3_txbuf *vxtxr_txbuf;
152 	struct vmxnet3_txdesc *vxtxr_txd;
153 	u_int vxtxr_head;
154 	u_int vxtxr_next;
155 	u_int vxtxr_ndesc;
156 	int vxtxr_gen;
157 	struct vmxnet3_dma_alloc vxtxr_dma;
158 };
159 
160 struct vmxnet3_rxbuf {
161 	bus_dmamap_t vrxb_dmamap;
162 	struct mbuf *vrxb_m;
163 };
164 
165 struct vmxnet3_rxring {
166 	struct vmxnet3_rxbuf *vxrxr_rxbuf;
167 	struct vmxnet3_rxdesc *vxrxr_rxd;
168 	u_int vxrxr_fill;
169 	u_int vxrxr_ndesc;
170 	int vxrxr_gen;
171 	int vxrxr_rid;
172 	struct vmxnet3_dma_alloc vxrxr_dma;
173 	bus_dmamap_t vxrxr_spare_dmap;
174 };
175 
176 struct vmxnet3_comp_ring {
177 	union {
178 		struct vmxnet3_txcompdesc *txcd;
179 		struct vmxnet3_rxcompdesc *rxcd;
180 	} vxcr_u;
181 	u_int vxcr_next;
182 	u_int vxcr_ndesc;
183 	int vxcr_gen;
184 	struct vmxnet3_dma_alloc vxcr_dma;
185 };
186 
187 struct vmxnet3_txq_stats {
188 	uint64_t vmtxs_csum;
189 	uint64_t vmtxs_tso;
190 	uint64_t vmtxs_full;
191 	uint64_t vmtxs_offload_failed;
192 };
193 
194 struct vmxnet3_txqueue {
195 	kmutex_t *vxtxq_mtx;
196 	struct vmxnet3_softc *vxtxq_sc;
197 	int vxtxq_watchdog;
198 	pcq_t *vxtxq_interq;
199 	struct vmxnet3_txring vxtxq_cmd_ring;
200 	struct vmxnet3_comp_ring vxtxq_comp_ring;
201 	struct vmxnet3_txq_stats vxtxq_stats;
202 	struct vmxnet3_txq_shared *vxtxq_ts;
203 	char vxtxq_name[16];
204 
205 	void *vxtxq_si;
206 
207 	struct evcnt vxtxq_intr;
208 	struct evcnt vxtxq_defer;
209 	struct evcnt vxtxq_deferreq;
210 	struct evcnt vxtxq_pcqdrop;
211 	struct evcnt vxtxq_transmitdef;
212 	struct evcnt vxtxq_watchdogto;
213 	struct evcnt vxtxq_defragged;
214 	struct evcnt vxtxq_defrag_failed;
215 
216 	bool vxtxq_stopping;
217 };
218 
219 
220 struct vmxnet3_rxqueue {
221 	kmutex_t *vxrxq_mtx;
222 	struct vmxnet3_softc *vxrxq_sc;
223 	struct mbuf *vxrxq_mhead;
224 	struct mbuf *vxrxq_mtail;
225 	struct vmxnet3_rxring vxrxq_cmd_ring[VMXNET3_RXRINGS_PERQ];
226 	struct vmxnet3_comp_ring vxrxq_comp_ring;
227 	struct vmxnet3_rxq_shared *vxrxq_rs;
228 	char vxrxq_name[16];
229 
230 	struct evcnt vxrxq_intr;
231 	struct evcnt vxrxq_defer;
232 	struct evcnt vxrxq_deferreq;
233 	struct evcnt vxrxq_mgetcl_failed;
234 	struct evcnt vxrxq_mbuf_load_failed;
235 
236 	bool vxrxq_stopping;
237 };
238 
239 struct vmxnet3_queue {
240 	int vxq_id;
241 	int vxq_intr_idx;
242 
243 	struct vmxnet3_txqueue vxq_txqueue;
244 	struct vmxnet3_rxqueue vxq_rxqueue;
245 
246 	void *vxq_si;
247 	bool vxq_workqueue;
248 	bool vxq_wq_enqueued;
249 	struct work vxq_wq_cookie;
250 };
251 
252 struct vmxnet3_softc {
253 	device_t vmx_dev;
254 	struct ethercom vmx_ethercom;
255 	struct ifmedia vmx_media;
256 	struct vmxnet3_driver_shared *vmx_ds;
257 	int vmx_flags;
258 #define VMXNET3_FLAG_NO_MSIX	(1 << 0)
259 #define VMXNET3_FLAG_RSS	(1 << 1)
260 #define VMXNET3_FLAG_ATTACHED	(1 << 2)
261 
262 	struct vmxnet3_queue *vmx_queue;
263 
264 	struct pci_attach_args *vmx_pa;
265 	pci_chipset_tag_t vmx_pc;
266 
267 	bus_space_tag_t vmx_iot0;
268 	bus_space_tag_t vmx_iot1;
269 	bus_space_handle_t vmx_ioh0;
270 	bus_space_handle_t vmx_ioh1;
271 	bus_size_t vmx_ios0;
272 	bus_size_t vmx_ios1;
273 	bus_dma_tag_t vmx_dmat;
274 
275 	int vmx_link_active;
276 	int vmx_ntxqueues;
277 	int vmx_nrxqueues;
278 	int vmx_ntxdescs;
279 	int vmx_nrxdescs;
280 	int vmx_max_rxsegs;
281 
282 	struct evcnt vmx_event_intr;
283 	struct evcnt vmx_event_link;
284 	struct evcnt vmx_event_txqerror;
285 	struct evcnt vmx_event_rxqerror;
286 	struct evcnt vmx_event_dic;
287 	struct evcnt vmx_event_debug;
288 
289 	int vmx_intr_type;
290 	int vmx_intr_mask_mode;
291 	int vmx_event_intr_idx;
292 	int vmx_nintrs;
293 	pci_intr_handle_t *vmx_intrs;	/* legacy use vmx_intrs[0] */
294 	void *vmx_ihs[VMXNET3_MAX_INTRS];
295 
296 	kmutex_t *vmx_mtx;
297 
298 	int vmx_if_flags;
299 	bool vmx_promisc;
300 	bool vmx_mcastactive;
301 	uint8_t *vmx_mcast;
302 	void *vmx_qs;
303 	struct vmxnet3_rss_shared *vmx_rss;
304 	callout_t vmx_tick;
305 	struct vmxnet3_dma_alloc vmx_ds_dma;
306 	struct vmxnet3_dma_alloc vmx_qs_dma;
307 	struct vmxnet3_dma_alloc vmx_mcast_dma;
308 	struct vmxnet3_dma_alloc vmx_rss_dma;
309 	int vmx_max_ntxqueues;
310 	int vmx_max_nrxqueues;
311 	uint8_t vmx_lladdr[ETHER_ADDR_LEN];
312 
313 	u_int vmx_rx_intr_process_limit;
314 	u_int vmx_tx_intr_process_limit;
315 	u_int vmx_rx_process_limit;
316 	u_int vmx_tx_process_limit;
317 	struct sysctllog *vmx_sysctllog;
318 
319 	bool vmx_txrx_workqueue;
320 	struct workqueue *vmx_queue_wq;
321 
322 	struct workqueue *vmx_reset_wq;
323 	struct work vmx_reset_work;
324 	bool vmx_reset_pending;
325 };
326 
327 #define VMXNET3_STAT
328 
329 #ifdef VMXNET3_STAT
330 struct {
331 	u_int txhead;
332 	u_int txdone;
333 	u_int maxtxlen;
334 	u_int rxdone;
335 	u_int rxfill;
336 	u_int intr;
337 } vmxstat;
338 #endif
339 
340 typedef enum {
341 	VMXNET3_BARRIER_RD,
342 	VMXNET3_BARRIER_WR,
343 } vmxnet3_barrier_t;
344 
345 #define JUMBO_LEN (MCLBYTES - ETHER_ALIGN)	/* XXX */
346 #define DMAADDR(map) ((map)->dm_segs[0].ds_addr)
347 
348 #define vtophys(va) 0		/* XXX ok? */
349 
350 static int vmxnet3_match(device_t, cfdata_t, void *);
351 static void vmxnet3_attach(device_t, device_t, void *);
352 static int vmxnet3_detach(device_t, int);
353 
354 static int vmxnet3_alloc_pci_resources(struct vmxnet3_softc *);
355 static void vmxnet3_free_pci_resources(struct vmxnet3_softc *);
356 static int vmxnet3_check_version(struct vmxnet3_softc *);
357 static void vmxnet3_check_multiqueue(struct vmxnet3_softc *);
358 
359 static int vmxnet3_alloc_msix_interrupts(struct vmxnet3_softc *);
360 static int vmxnet3_alloc_msi_interrupts(struct vmxnet3_softc *);
361 static int vmxnet3_alloc_legacy_interrupts(struct vmxnet3_softc *);
362 static int vmxnet3_alloc_interrupts(struct vmxnet3_softc *);
363 static void vmxnet3_free_interrupts(struct vmxnet3_softc *);
364 
365 static int vmxnet3_setup_msix_interrupts(struct vmxnet3_softc *);
366 static int vmxnet3_setup_msi_interrupt(struct vmxnet3_softc *);
367 static int vmxnet3_setup_legacy_interrupt(struct vmxnet3_softc *);
368 static void vmxnet3_set_interrupt_idx(struct vmxnet3_softc *);
369 static int vmxnet3_setup_interrupts(struct vmxnet3_softc *);
370 static int vmxnet3_setup_sysctl(struct vmxnet3_softc *);
371 
372 static int vmxnet3_setup_stats(struct vmxnet3_softc *);
373 static void vmxnet3_teardown_stats(struct vmxnet3_softc *);
374 
375 static int vmxnet3_init_rxq(struct vmxnet3_softc *, int);
376 static int vmxnet3_init_txq(struct vmxnet3_softc *, int);
377 static int vmxnet3_alloc_rxtx_queues(struct vmxnet3_softc *);
378 static void vmxnet3_destroy_rxq(struct vmxnet3_rxqueue *);
379 static void vmxnet3_destroy_txq(struct vmxnet3_txqueue *);
380 static void vmxnet3_free_rxtx_queues(struct vmxnet3_softc *);
381 
382 static int vmxnet3_alloc_shared_data(struct vmxnet3_softc *);
383 static void vmxnet3_free_shared_data(struct vmxnet3_softc *);
384 static int vmxnet3_alloc_txq_data(struct vmxnet3_softc *);
385 static void vmxnet3_free_txq_data(struct vmxnet3_softc *);
386 static int vmxnet3_alloc_rxq_data(struct vmxnet3_softc *);
387 static void vmxnet3_free_rxq_data(struct vmxnet3_softc *);
388 static int vmxnet3_alloc_queue_data(struct vmxnet3_softc *);
389 static void vmxnet3_free_queue_data(struct vmxnet3_softc *);
390 static int vmxnet3_alloc_mcast_table(struct vmxnet3_softc *);
391 static void vmxnet3_free_mcast_table(struct vmxnet3_softc *);
392 static void vmxnet3_init_shared_data(struct vmxnet3_softc *);
393 static void vmxnet3_reinit_rss_shared_data(struct vmxnet3_softc *);
394 static void vmxnet3_reinit_shared_data(struct vmxnet3_softc *);
395 static int vmxnet3_alloc_data(struct vmxnet3_softc *);
396 static void vmxnet3_free_data(struct vmxnet3_softc *);
397 static int vmxnet3_setup_interface(struct vmxnet3_softc *);
398 
399 static void vmxnet3_evintr(struct vmxnet3_softc *);
400 static bool vmxnet3_txq_eof(struct vmxnet3_txqueue *, u_int);
401 static int vmxnet3_newbuf(struct vmxnet3_softc *, struct vmxnet3_rxqueue *,
402     struct vmxnet3_rxring *);
403 static void vmxnet3_rxq_eof_discard(struct vmxnet3_rxqueue *,
404     struct vmxnet3_rxring *, int);
405 static void vmxnet3_rxq_discard_chain(struct vmxnet3_rxqueue *);
406 static void vmxnet3_rx_csum(struct vmxnet3_rxcompdesc *, struct mbuf *);
407 static void vmxnet3_rxq_input(struct vmxnet3_rxqueue *,
408     struct vmxnet3_rxcompdesc *, struct mbuf *);
409 static bool vmxnet3_rxq_eof(struct vmxnet3_rxqueue *, u_int);
410 static int vmxnet3_legacy_intr(void *);
411 static int vmxnet3_txrxq_intr(void *);
412 static void vmxnet3_handle_queue(void *);
413 static void vmxnet3_handle_queue_work(struct work *, void *);
414 static int vmxnet3_event_intr(void *);
415 
416 static void vmxnet3_txstop(struct vmxnet3_softc *, struct vmxnet3_txqueue *);
417 static void vmxnet3_rxstop(struct vmxnet3_softc *, struct vmxnet3_rxqueue *);
418 static void vmxnet3_stop_locked(struct vmxnet3_softc *);
419 static void vmxnet3_stop_rendezvous(struct vmxnet3_softc *);
420 static void vmxnet3_stop(struct ifnet *, int);
421 
422 static void vmxnet3_txinit(struct vmxnet3_softc *, struct vmxnet3_txqueue *);
423 static int vmxnet3_rxinit(struct vmxnet3_softc *, struct vmxnet3_rxqueue *);
424 static int vmxnet3_reinit_queues(struct vmxnet3_softc *);
425 static int vmxnet3_enable_device(struct vmxnet3_softc *);
426 static void vmxnet3_reinit_rxfilters(struct vmxnet3_softc *);
427 static int vmxnet3_reinit(struct vmxnet3_softc *);
428 
429 static int vmxnet3_init_locked(struct vmxnet3_softc *);
430 static int vmxnet3_init(struct ifnet *);
431 
432 static int vmxnet3_txq_offload_ctx(struct vmxnet3_txqueue *, struct mbuf *, int *, int *);
433 static int vmxnet3_txq_load_mbuf(struct vmxnet3_txqueue *, struct mbuf **, bus_dmamap_t);
434 static void vmxnet3_txq_unload_mbuf(struct vmxnet3_txqueue *, bus_dmamap_t);
435 static int vmxnet3_txq_encap(struct vmxnet3_txqueue *, struct mbuf **);
436 static void vmxnet3_start_locked(struct ifnet *);
437 static void vmxnet3_start(struct ifnet *);
438 static void vmxnet3_transmit_locked(struct ifnet *, struct vmxnet3_txqueue *);
439 static int vmxnet3_transmit(struct ifnet *, struct mbuf *);
440 static void vmxnet3_deferred_transmit(void *);
441 
442 static void vmxnet3_set_rxfilter(struct vmxnet3_softc *);
443 static int vmxnet3_ioctl(struct ifnet *, u_long, void *);
444 static int vmxnet3_ifflags_cb(struct ethercom *);
445 
446 static int vmxnet3_watchdog(struct vmxnet3_txqueue *);
447 static void vmxnet3_refresh_host_stats(struct vmxnet3_softc *);
448 static void vmxnet3_tick(void *);
449 static void vmxnet3_reset_work(struct work *, void *);
450 static void vmxnet3_if_link_status(struct vmxnet3_softc *);
451 static bool vmxnet3_cmd_link_status(struct ifnet *);
452 static void vmxnet3_ifmedia_status(struct ifnet *, struct ifmediareq *);
453 static int vmxnet3_ifmedia_change(struct ifnet *);
454 static void vmxnet3_set_lladdr(struct vmxnet3_softc *);
455 static void vmxnet3_get_lladdr(struct vmxnet3_softc *);
456 
457 static void vmxnet3_enable_all_intrs(struct vmxnet3_softc *);
458 static void vmxnet3_disable_all_intrs(struct vmxnet3_softc *);
459 
460 static int vmxnet3_dma_malloc(struct vmxnet3_softc *, bus_size_t, bus_size_t,
461     struct vmxnet3_dma_alloc *);
462 static void vmxnet3_dma_free(struct vmxnet3_softc *, struct vmxnet3_dma_alloc *);
463 
464 CFATTACH_DECL3_NEW(vmx, sizeof(struct vmxnet3_softc),
465     vmxnet3_match, vmxnet3_attach, vmxnet3_detach, NULL, NULL, NULL, 0);
466 
467 /* round down to the nearest power of 2 */
468 static int
469 vmxnet3_calc_queue_size(int n)
470 {
471 
472 	if (__predict_false(n <= 0))
473 		return 1;
474 
475 	return (1U << (fls32(n) - 1));
476 }
477 
478 static inline void
479 vmxnet3_write_bar0(struct vmxnet3_softc *sc, bus_size_t r, uint32_t v)
480 {
481 
482 	bus_space_write_4(sc->vmx_iot0, sc->vmx_ioh0, r, v);
483 }
484 
485 static inline uint32_t
486 vmxnet3_read_bar1(struct vmxnet3_softc *sc, bus_size_t r)
487 {
488 
489 	return (bus_space_read_4(sc->vmx_iot1, sc->vmx_ioh1, r));
490 }
491 
492 static inline void
493 vmxnet3_write_bar1(struct vmxnet3_softc *sc, bus_size_t r, uint32_t v)
494 {
495 
496 	bus_space_write_4(sc->vmx_iot1, sc->vmx_ioh1, r, v);
497 }
498 
499 static inline void
500 vmxnet3_write_cmd(struct vmxnet3_softc *sc, uint32_t cmd)
501 {
502 
503 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_CMD, cmd);
504 }
505 
506 static inline uint32_t
507 vmxnet3_read_cmd(struct vmxnet3_softc *sc, uint32_t cmd)
508 {
509 
510 	vmxnet3_write_cmd(sc, cmd);
511 	return (vmxnet3_read_bar1(sc, VMXNET3_BAR1_CMD));
512 }
513 
514 static inline void
515 vmxnet3_enable_intr(struct vmxnet3_softc *sc, int irq)
516 {
517 	vmxnet3_write_bar0(sc, VMXNET3_BAR0_IMASK(irq), 0);
518 }
519 
520 static inline void
521 vmxnet3_disable_intr(struct vmxnet3_softc *sc, int irq)
522 {
523 	vmxnet3_write_bar0(sc, VMXNET3_BAR0_IMASK(irq), 1);
524 }
525 
526 static inline void
527 vmxnet3_rxr_increment_fill(struct vmxnet3_rxring *rxr)
528 {
529 
530 	if (++rxr->vxrxr_fill == rxr->vxrxr_ndesc) {
531 		rxr->vxrxr_fill = 0;
532 		rxr->vxrxr_gen ^= 1;
533 	}
534 }
535 
536 static inline int
537 vmxnet3_txring_avail(struct vmxnet3_txring *txr)
538 {
539 	int avail = txr->vxtxr_next - txr->vxtxr_head - 1;
540 	return (avail < 0 ? (int)txr->vxtxr_ndesc + avail : avail);
541 }
542 
543 /*
544  * Since this is a purely paravirtualized device, we do not have
545  * to worry about DMA coherency. But at times, we must make sure
546  * both the compiler and CPU do not reorder memory operations.
547  */
548 static inline void
549 vmxnet3_barrier(struct vmxnet3_softc *sc, vmxnet3_barrier_t type)
550 {
551 
552 	switch (type) {
553 	case VMXNET3_BARRIER_RD:
554 		membar_consumer();
555 		break;
556 	case VMXNET3_BARRIER_WR:
557 		membar_producer();
558 		break;
559 	default:
560 		panic("%s: bad barrier type %d", __func__, type);
561 	}
562 }
563 
564 static int
565 vmxnet3_match(device_t parent, cfdata_t match, void *aux)
566 {
567 	struct pci_attach_args *pa = (struct pci_attach_args *)aux;
568 
569 	if (PCI_VENDOR(pa->pa_id) == PCI_VENDOR_VMWARE &&
570 	    PCI_PRODUCT(pa->pa_id) == PCI_PRODUCT_VMWARE_VMXNET3)
571 		return 1;
572 
573 	return 0;
574 }
575 
576 static void
577 vmxnet3_attach(device_t parent, device_t self, void *aux)
578 {
579 	struct vmxnet3_softc *sc = device_private(self);
580 	struct pci_attach_args *pa = aux;
581 	pcireg_t preg;
582 	int error;
583 	int candidate;
584 
585 	sc->vmx_dev = self;
586 	sc->vmx_pa = pa;
587 	sc->vmx_pc = pa->pa_pc;
588 	if (pci_dma64_available(pa))
589 		sc->vmx_dmat = pa->pa_dmat64;
590 	else
591 		sc->vmx_dmat = pa->pa_dmat;
592 
593 	pci_aprint_devinfo_fancy(pa, "Ethernet controller", "vmxnet3", 1);
594 
595 	preg = pci_conf_read(pa->pa_pc, pa->pa_tag, PCI_COMMAND_STATUS_REG);
596 	preg |= PCI_COMMAND_MASTER_ENABLE;
597 	pci_conf_write(pa->pa_pc, pa->pa_tag, PCI_COMMAND_STATUS_REG, preg);
598 
599 	sc->vmx_mtx = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NET);
600 	callout_init(&sc->vmx_tick, CALLOUT_MPSAFE);
601 
602 	candidate = MIN(MIN(VMXNET3_MAX_TX_QUEUES, VMXNET3_MAX_RX_QUEUES),
603 	    ncpu);
604 	sc->vmx_max_ntxqueues = sc->vmx_max_nrxqueues =
605 	    vmxnet3_calc_queue_size(candidate);
606 	sc->vmx_ntxdescs = 512;
607 	sc->vmx_nrxdescs = 256;
608 	sc->vmx_max_rxsegs = VMXNET3_MAX_RX_SEGS;
609 
610 	error = vmxnet3_alloc_pci_resources(sc);
611 	if (error)
612 		return;
613 
614 	error = vmxnet3_check_version(sc);
615 	if (error)
616 		return;
617 
618 	error = vmxnet3_alloc_rxtx_queues(sc);
619 	if (error)
620 		return;
621 
622 	error = vmxnet3_alloc_interrupts(sc);
623 	if (error)
624 		return;
625 
626 	vmxnet3_check_multiqueue(sc);
627 
628 	error = vmxnet3_alloc_data(sc);
629 	if (error)
630 		return;
631 
632 	error = vmxnet3_setup_interface(sc);
633 	if (error)
634 		return;
635 
636 	error = vmxnet3_setup_interrupts(sc);
637 	if (error)
638 		return;
639 
640 	error = vmxnet3_setup_sysctl(sc);
641 	if (error)
642 		return;
643 
644 	error = vmxnet3_setup_stats(sc);
645 	if (error)
646 		return;
647 
648 	char buf[128];
649 	snprintf(buf, sizeof(buf), "%s_reset", device_xname(sc->vmx_dev));
650 	error = workqueue_create(&sc->vmx_reset_wq, "%s_reset",
651 	    vmxnet3_reset_work, sc, VMXNET3_WORKQUEUE_PRI, IPL_SOFTCLOCK,
652 	    WQ_MPSAFE);
653 	if (error) {
654 		aprint_error_dev(sc->vmx_dev,
655 		    "failed to create reset workqueue: %d\n",
656 		    error);
657 		return;
658 	}
659 
660 	sc->vmx_flags |= VMXNET3_FLAG_ATTACHED;
661 }
662 
663 static int
664 vmxnet3_detach(device_t self, int flags)
665 {
666 	struct vmxnet3_softc *sc;
667 	struct ifnet *ifp;
668 
669 	sc = device_private(self);
670 	ifp = &sc->vmx_ethercom.ec_if;
671 
672 	if (sc->vmx_flags & VMXNET3_FLAG_ATTACHED) {
673 		VMXNET3_CORE_LOCK(sc);
674 		vmxnet3_stop_locked(sc);
675 		callout_halt(&sc->vmx_tick, sc->vmx_mtx);
676 		callout_destroy(&sc->vmx_tick);
677 		VMXNET3_CORE_UNLOCK(sc);
678 
679 		ether_ifdetach(ifp);
680 		if_detach(ifp);
681 		ifmedia_fini(&sc->vmx_media);
682 	}
683 
684 	vmxnet3_teardown_stats(sc);
685 	sysctl_teardown(&sc->vmx_sysctllog);
686 
687 	vmxnet3_free_interrupts(sc);
688 
689 	vmxnet3_free_data(sc);
690 	vmxnet3_free_pci_resources(sc);
691 	vmxnet3_free_rxtx_queues(sc);
692 
693 	if (sc->vmx_mtx)
694 		mutex_obj_free(sc->vmx_mtx);
695 
696 	return (0);
697 }
698 
699 static int
700 vmxnet3_alloc_pci_resources(struct vmxnet3_softc *sc)
701 {
702 	struct pci_attach_args *pa = sc->vmx_pa;
703 	pcireg_t memtype;
704 
705 	memtype = pci_mapreg_type(pa->pa_pc, pa->pa_tag, PCI_BAR(0));
706 	if (pci_mapreg_map(pa, PCI_BAR(0), memtype, 0, &sc->vmx_iot0, &sc->vmx_ioh0,
707 	    NULL, &sc->vmx_ios0)) {
708 		aprint_error_dev(sc->vmx_dev, "failed to map BAR0\n");
709 		return (ENXIO);
710 	}
711 	memtype = pci_mapreg_type(pa->pa_pc, pa->pa_tag, PCI_BAR(1));
712 	if (pci_mapreg_map(pa, PCI_BAR(1), memtype, 0, &sc->vmx_iot1, &sc->vmx_ioh1,
713 	    NULL, &sc->vmx_ios1)) {
714 		aprint_error_dev(sc->vmx_dev, "failed to map BAR1\n");
715 		return (ENXIO);
716 	}
717 
718 	if (!pci_get_capability(pa->pa_pc, pa->pa_tag, PCI_CAP_MSIX, NULL, NULL)) {
719 		sc->vmx_flags |= VMXNET3_FLAG_NO_MSIX;
720 		return (0);
721 	}
722 
723 	return (0);
724 }
725 
726 static void
727 vmxnet3_free_pci_resources(struct vmxnet3_softc *sc)
728 {
729 
730 	if (sc->vmx_ios0) {
731 		bus_space_unmap(sc->vmx_iot0, sc->vmx_ioh0, sc->vmx_ios0);
732 		sc->vmx_ios0 = 0;
733 	}
734 
735 	if (sc->vmx_ios1) {
736 		bus_space_unmap(sc->vmx_iot1, sc->vmx_ioh1, sc->vmx_ios1);
737 		sc->vmx_ios1 = 0;
738 	}
739 }
740 
741 static int
742 vmxnet3_check_version(struct vmxnet3_softc *sc)
743 {
744 	u_int ver;
745 
746 	ver = vmxnet3_read_bar1(sc, VMXNET3_BAR1_VRRS);
747 	if ((ver & 0x1) == 0) {
748 		aprint_error_dev(sc->vmx_dev,
749 		    "unsupported hardware version 0x%x\n", ver);
750 		return (ENOTSUP);
751 	}
752 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_VRRS, 1);
753 
754 	ver = vmxnet3_read_bar1(sc, VMXNET3_BAR1_UVRS);
755 	if ((ver & 0x1) == 0) {
756 		aprint_error_dev(sc->vmx_dev,
757 		    "incompatiable UPT version 0x%x\n", ver);
758 		return (ENOTSUP);
759 	}
760 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_UVRS, 1);
761 
762 	return (0);
763 }
764 
765 static void
766 vmxnet3_check_multiqueue(struct vmxnet3_softc *sc)
767 {
768 
769 	if (sc->vmx_intr_type != VMXNET3_IT_MSIX)
770 		goto out;
771 
772 	/* Just use the maximum configured for now. */
773 	sc->vmx_nrxqueues = sc->vmx_max_nrxqueues;
774 	sc->vmx_ntxqueues = sc->vmx_max_ntxqueues;
775 
776 	if (sc->vmx_nrxqueues > 1)
777 		sc->vmx_flags |= VMXNET3_FLAG_RSS;
778 
779 	return;
780 
781 out:
782 	sc->vmx_ntxqueues = 1;
783 	sc->vmx_nrxqueues = 1;
784 }
785 
786 static int
787 vmxnet3_alloc_msix_interrupts(struct vmxnet3_softc *sc)
788 {
789 	int required;
790 	struct pci_attach_args *pa = sc->vmx_pa;
791 
792 	if (sc->vmx_flags & VMXNET3_FLAG_NO_MSIX)
793 		return (1);
794 
795 	/* Allocate an additional vector for the events interrupt. */
796 	required = MIN(sc->vmx_max_ntxqueues, sc->vmx_max_nrxqueues) + 1;
797 
798 	if (pci_msix_count(pa->pa_pc, pa->pa_tag) < required)
799 		return (1);
800 
801 	if (pci_msix_alloc_exact(pa, &sc->vmx_intrs, required) == 0) {
802 		sc->vmx_nintrs = required;
803 		return (0);
804 	}
805 
806 	return (1);
807 }
808 
809 static int
810 vmxnet3_alloc_msi_interrupts(struct vmxnet3_softc *sc)
811 {
812 	int nmsi, required;
813 	struct pci_attach_args *pa = sc->vmx_pa;
814 
815 	required = 1;
816 
817 	nmsi = pci_msi_count(pa->pa_pc, pa->pa_tag);
818 	if (nmsi < required)
819 		return (1);
820 
821 	if (pci_msi_alloc_exact(pa, &sc->vmx_intrs, required) == 0) {
822 		sc->vmx_nintrs = required;
823 		return (0);
824 	}
825 
826 	return (1);
827 }
828 
829 static int
830 vmxnet3_alloc_legacy_interrupts(struct vmxnet3_softc *sc)
831 {
832 
833 	if (pci_intx_alloc(sc->vmx_pa, &sc->vmx_intrs) == 0) {
834 		sc->vmx_nintrs = 1;
835 		return (0);
836 	}
837 
838 	return (1);
839 }
840 
841 static int
842 vmxnet3_alloc_interrupts(struct vmxnet3_softc *sc)
843 {
844 	u_int config;
845 	int error;
846 
847 	config = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_INTRCFG);
848 
849 	sc->vmx_intr_type = config & 0x03;
850 	sc->vmx_intr_mask_mode = (config >> 2) & 0x03;
851 
852 	switch (sc->vmx_intr_type) {
853 	case VMXNET3_IT_AUTO:
854 		sc->vmx_intr_type = VMXNET3_IT_MSIX;
855 		/* FALLTHROUGH */
856 	case VMXNET3_IT_MSIX:
857 		error = vmxnet3_alloc_msix_interrupts(sc);
858 		if (error == 0)
859 			break;
860 		sc->vmx_intr_type = VMXNET3_IT_MSI;
861 		/* FALLTHROUGH */
862 	case VMXNET3_IT_MSI:
863 		error = vmxnet3_alloc_msi_interrupts(sc);
864 		if (error == 0)
865 			break;
866 		sc->vmx_intr_type = VMXNET3_IT_LEGACY;
867 		/* FALLTHROUGH */
868 	case VMXNET3_IT_LEGACY:
869 		error = vmxnet3_alloc_legacy_interrupts(sc);
870 		if (error == 0)
871 			break;
872 		/* FALLTHROUGH */
873 	default:
874 		sc->vmx_intr_type = -1;
875 		aprint_error_dev(sc->vmx_dev, "cannot allocate any interrupt resources\n");
876 		return (ENXIO);
877 	}
878 
879 	return (error);
880 }
881 
882 static void
883 vmxnet3_free_interrupts(struct vmxnet3_softc *sc)
884 {
885 	pci_chipset_tag_t pc = sc->vmx_pc;
886 	int i;
887 
888 	workqueue_destroy(sc->vmx_queue_wq);
889 	for (i = 0; i < sc->vmx_ntxqueues; i++) {
890 		struct vmxnet3_queue *vmxq =  &sc->vmx_queue[i];
891 
892 		softint_disestablish(vmxq->vxq_si);
893 		vmxq->vxq_si = NULL;
894 	}
895 	for (i = 0; i < sc->vmx_nintrs; i++) {
896 		pci_intr_disestablish(pc, sc->vmx_ihs[i]);
897 	}
898 	pci_intr_release(pc, sc->vmx_intrs, sc->vmx_nintrs);
899 }
900 
901 static int
902 vmxnet3_setup_msix_interrupts(struct vmxnet3_softc *sc)
903 {
904 	pci_chipset_tag_t pc = sc->vmx_pa->pa_pc;
905 	struct vmxnet3_queue *vmxq;
906 	pci_intr_handle_t *intr;
907 	void **ihs;
908 	int intr_idx, i, use_queues, error;
909 	kcpuset_t *affinity;
910 	const char *intrstr;
911 	char intrbuf[PCI_INTRSTR_LEN];
912 	char xnamebuf[32];
913 
914 	intr = sc->vmx_intrs;
915 	intr_idx = 0;
916 	ihs = sc->vmx_ihs;
917 
918 	/* See vmxnet3_alloc_msix_interrupts() */
919 	use_queues = MIN(sc->vmx_max_ntxqueues, sc->vmx_max_nrxqueues);
920 	for (i = 0; i < use_queues; i++, intr++, ihs++, intr_idx++) {
921 		snprintf(xnamebuf, 32, "%s: txrx %d", device_xname(sc->vmx_dev), i);
922 
923 		vmxq = &sc->vmx_queue[i];
924 
925 		intrstr = pci_intr_string(pc, *intr, intrbuf, sizeof(intrbuf));
926 
927 		pci_intr_setattr(pc, intr, PCI_INTR_MPSAFE, true);
928 		*ihs = pci_intr_establish_xname(pc, *intr, IPL_NET,
929 		    vmxnet3_txrxq_intr, vmxq, xnamebuf);
930 		if (*ihs == NULL) {
931 			aprint_error_dev(sc->vmx_dev,
932 			    "unable to establish txrx interrupt at %s\n", intrstr);
933 			return (-1);
934 		}
935 		aprint_normal_dev(sc->vmx_dev, "txrx interrupting at %s\n", intrstr);
936 
937 		kcpuset_create(&affinity, true);
938 		kcpuset_set(affinity, intr_idx % ncpu);
939 		error = interrupt_distribute(*ihs, affinity, NULL);
940 		if (error) {
941 			aprint_normal_dev(sc->vmx_dev,
942 			    "%s cannot be changed affinity, use default CPU\n",
943 			    intrstr);
944 		}
945 		kcpuset_destroy(affinity);
946 
947 		vmxq->vxq_si = softint_establish(SOFTINT_NET | SOFTINT_MPSAFE,
948 		    vmxnet3_handle_queue, vmxq);
949 		if (vmxq->vxq_si == NULL) {
950 			aprint_error_dev(sc->vmx_dev,
951 			    "softint_establish for vxq_si failed\n");
952 			return (-1);
953 		}
954 
955 		vmxq->vxq_intr_idx = intr_idx;
956 	}
957 	snprintf(xnamebuf, MAXCOMLEN, "%s_tx_rx", device_xname(sc->vmx_dev));
958 	error = workqueue_create(&sc->vmx_queue_wq, xnamebuf,
959 	    vmxnet3_handle_queue_work, sc, VMXNET3_WORKQUEUE_PRI, IPL_NET,
960 	    WQ_PERCPU | WQ_MPSAFE);
961 	if (error) {
962 		aprint_error_dev(sc->vmx_dev, "workqueue_create failed\n");
963 		return (-1);
964 	}
965 	sc->vmx_txrx_workqueue = false;
966 
967 	intrstr = pci_intr_string(pc, *intr, intrbuf, sizeof(intrbuf));
968 
969 	snprintf(xnamebuf, 32, "%s: link", device_xname(sc->vmx_dev));
970 	pci_intr_setattr(pc, intr, PCI_INTR_MPSAFE, true);
971 	*ihs = pci_intr_establish_xname(pc, *intr, IPL_NET,
972 	    vmxnet3_event_intr, sc, xnamebuf);
973 	if (*ihs == NULL) {
974 		aprint_error_dev(sc->vmx_dev,
975 		    "unable to establish event interrupt at %s\n", intrstr);
976 		return (-1);
977 	}
978 	aprint_normal_dev(sc->vmx_dev, "event interrupting at %s\n", intrstr);
979 
980 	sc->vmx_event_intr_idx = intr_idx;
981 
982 	return (0);
983 }
984 
985 static int
986 vmxnet3_setup_msi_interrupt(struct vmxnet3_softc *sc)
987 {
988 	pci_chipset_tag_t pc = sc->vmx_pa->pa_pc;
989 	pci_intr_handle_t *intr;
990 	void **ihs;
991 	struct vmxnet3_queue *vmxq;
992 	int i;
993 	const char *intrstr;
994 	char intrbuf[PCI_INTRSTR_LEN];
995 	char xnamebuf[32];
996 
997 	intr = &sc->vmx_intrs[0];
998 	ihs = sc->vmx_ihs;
999 	vmxq = &sc->vmx_queue[0];
1000 
1001 	intrstr = pci_intr_string(pc, *intr, intrbuf, sizeof(intrbuf));
1002 
1003 	snprintf(xnamebuf, 32, "%s: msi", device_xname(sc->vmx_dev));
1004 	pci_intr_setattr(pc, intr, PCI_INTR_MPSAFE, true);
1005 	*ihs = pci_intr_establish_xname(pc, *intr, IPL_NET,
1006 	    vmxnet3_legacy_intr, sc, xnamebuf);
1007 	if (*ihs == NULL) {
1008 		aprint_error_dev(sc->vmx_dev,
1009 		    "unable to establish interrupt at %s\n", intrstr);
1010 		return (-1);
1011 	}
1012 	aprint_normal_dev(sc->vmx_dev, "interrupting at %s\n", intrstr);
1013 
1014 	vmxq->vxq_si = softint_establish(SOFTINT_NET | SOFTINT_MPSAFE,
1015 	    vmxnet3_handle_queue, vmxq);
1016 	if (vmxq->vxq_si == NULL) {
1017 		aprint_error_dev(sc->vmx_dev,
1018 		    "softint_establish for vxq_si failed\n");
1019 		return (-1);
1020 	}
1021 
1022 	for (i = 0; i < MIN(sc->vmx_nrxqueues, sc->vmx_nrxqueues); i++)
1023 		sc->vmx_queue[i].vxq_intr_idx = 0;
1024 	sc->vmx_event_intr_idx = 0;
1025 
1026 	return (0);
1027 }
1028 
1029 static int
1030 vmxnet3_setup_legacy_interrupt(struct vmxnet3_softc *sc)
1031 {
1032 	pci_chipset_tag_t pc = sc->vmx_pa->pa_pc;
1033 	pci_intr_handle_t *intr;
1034 	void **ihs;
1035 	struct vmxnet3_queue *vmxq;
1036 	int i;
1037 	const char *intrstr;
1038 	char intrbuf[PCI_INTRSTR_LEN];
1039 	char xnamebuf[32];
1040 
1041 	intr = &sc->vmx_intrs[0];
1042 	ihs = sc->vmx_ihs;
1043 	vmxq = &sc->vmx_queue[0];
1044 
1045 	intrstr = pci_intr_string(pc, *intr, intrbuf, sizeof(intrbuf));
1046 
1047 	snprintf(xnamebuf, 32, "%s:legacy", device_xname(sc->vmx_dev));
1048 	pci_intr_setattr(pc, intr, PCI_INTR_MPSAFE, true);
1049 	*ihs = pci_intr_establish_xname(pc, *intr, IPL_NET,
1050 	    vmxnet3_legacy_intr, sc, xnamebuf);
1051 	if (*ihs == NULL) {
1052 		aprint_error_dev(sc->vmx_dev,
1053 		    "unable to establish interrupt at %s\n", intrstr);
1054 		return (-1);
1055 	}
1056 	aprint_normal_dev(sc->vmx_dev, "interrupting at %s\n", intrstr);
1057 
1058 	vmxq->vxq_si = softint_establish(SOFTINT_NET | SOFTINT_MPSAFE,
1059 	    vmxnet3_handle_queue, vmxq);
1060 	if (vmxq->vxq_si == NULL) {
1061 		aprint_error_dev(sc->vmx_dev,
1062 		    "softint_establish for vxq_si failed\n");
1063 		return (-1);
1064 	}
1065 
1066 	for (i = 0; i < MIN(sc->vmx_nrxqueues, sc->vmx_nrxqueues); i++)
1067 		sc->vmx_queue[i].vxq_intr_idx = 0;
1068 	sc->vmx_event_intr_idx = 0;
1069 
1070 	return (0);
1071 }
1072 
1073 static void
1074 vmxnet3_set_interrupt_idx(struct vmxnet3_softc *sc)
1075 {
1076 	struct vmxnet3_queue *vmxq;
1077 	struct vmxnet3_txqueue *txq;
1078 	struct vmxnet3_txq_shared *txs;
1079 	struct vmxnet3_rxqueue *rxq;
1080 	struct vmxnet3_rxq_shared *rxs;
1081 	int i;
1082 
1083 	sc->vmx_ds->evintr = sc->vmx_event_intr_idx;
1084 
1085 	for (i = 0; i < sc->vmx_ntxqueues; i++) {
1086 		vmxq = &sc->vmx_queue[i];
1087 		txq = &vmxq->vxq_txqueue;
1088 		txs = txq->vxtxq_ts;
1089 		txs->intr_idx = vmxq->vxq_intr_idx;
1090 	}
1091 
1092 	for (i = 0; i < sc->vmx_nrxqueues; i++) {
1093 		vmxq = &sc->vmx_queue[i];
1094 		rxq = &vmxq->vxq_rxqueue;
1095 		rxs = rxq->vxrxq_rs;
1096 		rxs->intr_idx = vmxq->vxq_intr_idx;
1097 	}
1098 }
1099 
1100 static int
1101 vmxnet3_setup_interrupts(struct vmxnet3_softc *sc)
1102 {
1103 	int error;
1104 
1105 	switch (sc->vmx_intr_type) {
1106 	case VMXNET3_IT_MSIX:
1107 		error = vmxnet3_setup_msix_interrupts(sc);
1108 		break;
1109 	case VMXNET3_IT_MSI:
1110 		error = vmxnet3_setup_msi_interrupt(sc);
1111 		break;
1112 	case VMXNET3_IT_LEGACY:
1113 		error = vmxnet3_setup_legacy_interrupt(sc);
1114 		break;
1115 	default:
1116 		panic("%s: invalid interrupt type %d", __func__,
1117 		    sc->vmx_intr_type);
1118 	}
1119 
1120 	if (error == 0)
1121 		vmxnet3_set_interrupt_idx(sc);
1122 
1123 	return (error);
1124 }
1125 
1126 static int
1127 vmxnet3_init_rxq(struct vmxnet3_softc *sc, int q)
1128 {
1129 	struct vmxnet3_rxqueue *rxq;
1130 	struct vmxnet3_rxring *rxr;
1131 	int i;
1132 
1133 	rxq = &sc->vmx_queue[q].vxq_rxqueue;
1134 
1135 	snprintf(rxq->vxrxq_name, sizeof(rxq->vxrxq_name), "%s-rx%d",
1136 	    device_xname(sc->vmx_dev), q);
1137 	rxq->vxrxq_mtx = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NET /* XXX */);
1138 
1139 	rxq->vxrxq_sc = sc;
1140 
1141 	for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1142 		rxr = &rxq->vxrxq_cmd_ring[i];
1143 		rxr->vxrxr_rid = i;
1144 		rxr->vxrxr_ndesc = sc->vmx_nrxdescs;
1145 		rxr->vxrxr_rxbuf = kmem_zalloc(rxr->vxrxr_ndesc *
1146 		    sizeof(struct vmxnet3_rxbuf), KM_SLEEP);
1147 
1148 		rxq->vxrxq_comp_ring.vxcr_ndesc += sc->vmx_nrxdescs;
1149 	}
1150 
1151 	rxq->vxrxq_stopping = true;
1152 
1153 	return (0);
1154 }
1155 
1156 static int
1157 vmxnet3_init_txq(struct vmxnet3_softc *sc, int q)
1158 {
1159 	struct vmxnet3_txqueue *txq;
1160 	struct vmxnet3_txring *txr;
1161 
1162 	txq = &sc->vmx_queue[q].vxq_txqueue;
1163 	txr = &txq->vxtxq_cmd_ring;
1164 
1165 	snprintf(txq->vxtxq_name, sizeof(txq->vxtxq_name), "%s-tx%d",
1166 	    device_xname(sc->vmx_dev), q);
1167 	txq->vxtxq_mtx = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NET /* XXX */);
1168 
1169 	txq->vxtxq_sc = sc;
1170 
1171 	txq->vxtxq_si = softint_establish(SOFTINT_NET | SOFTINT_MPSAFE,
1172 	    vmxnet3_deferred_transmit, txq);
1173 	if (txq->vxtxq_si == NULL) {
1174 		mutex_obj_free(txq->vxtxq_mtx);
1175 		aprint_error_dev(sc->vmx_dev,
1176 		    "softint_establish for vxtxq_si failed\n");
1177 		return ENOMEM;
1178 	}
1179 
1180 	txr->vxtxr_ndesc = sc->vmx_ntxdescs;
1181 	txr->vxtxr_txbuf = kmem_zalloc(txr->vxtxr_ndesc *
1182 	    sizeof(struct vmxnet3_txbuf), KM_SLEEP);
1183 
1184 	txq->vxtxq_comp_ring.vxcr_ndesc = sc->vmx_ntxdescs;
1185 
1186 	txq->vxtxq_interq = pcq_create(sc->vmx_ntxdescs, KM_SLEEP);
1187 
1188 	txq->vxtxq_stopping = true;
1189 
1190 	return (0);
1191 }
1192 
1193 static int
1194 vmxnet3_alloc_rxtx_queues(struct vmxnet3_softc *sc)
1195 {
1196 	int i, error, max_nqueues;
1197 
1198 	KASSERT(!cpu_intr_p());
1199 	KASSERT(!cpu_softintr_p());
1200 
1201 	/*
1202 	 * Only attempt to create multiple queues if MSIX is available.
1203 	 * This check prevents us from allocating queue structures that
1204 	 * we will not use.
1205 	 *
1206 	 * FreeBSD:
1207 	 * MSIX is disabled by default because its apparently broken for
1208 	 * devices passed through by at least ESXi 5.1.
1209 	 * The hw.pci.honor_msi_blacklist tunable must be set to zero for MSIX.
1210 	 */
1211 	if (sc->vmx_flags & VMXNET3_FLAG_NO_MSIX) {
1212 		sc->vmx_max_nrxqueues = 1;
1213 		sc->vmx_max_ntxqueues = 1;
1214 	}
1215 
1216 	max_nqueues = MAX(sc->vmx_max_ntxqueues, sc->vmx_max_nrxqueues);
1217 	sc->vmx_queue = kmem_zalloc(sizeof(struct vmxnet3_queue) * max_nqueues,
1218 	    KM_SLEEP);
1219 
1220 	for (i = 0; i < max_nqueues; i++) {
1221 		struct vmxnet3_queue *vmxq = &sc->vmx_queue[i];
1222 		vmxq->vxq_id = i;
1223 	}
1224 
1225 	for (i = 0; i < sc->vmx_max_nrxqueues; i++) {
1226 		error = vmxnet3_init_rxq(sc, i);
1227 		if (error)
1228 			return (error);
1229 	}
1230 
1231 	for (i = 0; i < sc->vmx_max_ntxqueues; i++) {
1232 		error = vmxnet3_init_txq(sc, i);
1233 		if (error)
1234 			return (error);
1235 	}
1236 
1237 	return (0);
1238 }
1239 
1240 static void
1241 vmxnet3_destroy_rxq(struct vmxnet3_rxqueue *rxq)
1242 {
1243 	struct vmxnet3_rxring *rxr;
1244 	int i;
1245 
1246 	rxq->vxrxq_sc = NULL;
1247 
1248 	for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1249 		rxr = &rxq->vxrxq_cmd_ring[i];
1250 
1251 		if (rxr->vxrxr_rxbuf != NULL) {
1252 			kmem_free(rxr->vxrxr_rxbuf,
1253 			    rxr->vxrxr_ndesc * sizeof(struct vmxnet3_rxbuf));
1254 			rxr->vxrxr_rxbuf = NULL;
1255 		}
1256 	}
1257 
1258 	if (rxq->vxrxq_mtx != NULL)
1259 		mutex_obj_free(rxq->vxrxq_mtx);
1260 }
1261 
1262 static void
1263 vmxnet3_destroy_txq(struct vmxnet3_txqueue *txq)
1264 {
1265 	struct vmxnet3_txring *txr;
1266 	struct mbuf *m;
1267 
1268 	txr = &txq->vxtxq_cmd_ring;
1269 
1270 	txq->vxtxq_sc = NULL;
1271 
1272 	softint_disestablish(txq->vxtxq_si);
1273 
1274 	while ((m = pcq_get(txq->vxtxq_interq)) != NULL)
1275 		m_freem(m);
1276 	pcq_destroy(txq->vxtxq_interq);
1277 
1278 	if (txr->vxtxr_txbuf != NULL) {
1279 		kmem_free(txr->vxtxr_txbuf,
1280 		    txr->vxtxr_ndesc * sizeof(struct vmxnet3_txbuf));
1281 		txr->vxtxr_txbuf = NULL;
1282 	}
1283 
1284 	if (txq->vxtxq_mtx != NULL)
1285 		mutex_obj_free(txq->vxtxq_mtx);
1286 }
1287 
1288 static void
1289 vmxnet3_free_rxtx_queues(struct vmxnet3_softc *sc)
1290 {
1291 	int i;
1292 
1293 	if (sc->vmx_queue != NULL) {
1294 		int max_nqueues;
1295 
1296 		for (i = 0; i < sc->vmx_max_nrxqueues; i++)
1297 			vmxnet3_destroy_rxq(&sc->vmx_queue[i].vxq_rxqueue);
1298 
1299 		for (i = 0; i < sc->vmx_max_ntxqueues; i++)
1300 			vmxnet3_destroy_txq(&sc->vmx_queue[i].vxq_txqueue);
1301 
1302 		max_nqueues = MAX(sc->vmx_max_nrxqueues, sc->vmx_max_ntxqueues);
1303 		kmem_free(sc->vmx_queue,
1304 		    sizeof(struct vmxnet3_queue) * max_nqueues);
1305 	}
1306 }
1307 
1308 static int
1309 vmxnet3_alloc_shared_data(struct vmxnet3_softc *sc)
1310 {
1311 	device_t dev;
1312 	uint8_t *kva;
1313 	size_t size;
1314 	int i, error;
1315 
1316 	dev = sc->vmx_dev;
1317 
1318 	size = sizeof(struct vmxnet3_driver_shared);
1319 	error = vmxnet3_dma_malloc(sc, size, 1, &sc->vmx_ds_dma);
1320 	if (error) {
1321 		device_printf(dev, "cannot alloc shared memory\n");
1322 		return (error);
1323 	}
1324 	sc->vmx_ds = (struct vmxnet3_driver_shared *) sc->vmx_ds_dma.dma_vaddr;
1325 
1326 	size = sc->vmx_ntxqueues * sizeof(struct vmxnet3_txq_shared) +
1327 	    sc->vmx_nrxqueues * sizeof(struct vmxnet3_rxq_shared);
1328 	error = vmxnet3_dma_malloc(sc, size, 128, &sc->vmx_qs_dma);
1329 	if (error) {
1330 		device_printf(dev, "cannot alloc queue shared memory\n");
1331 		return (error);
1332 	}
1333 	sc->vmx_qs = (void *) sc->vmx_qs_dma.dma_vaddr;
1334 	kva = sc->vmx_qs;
1335 
1336 	for (i = 0; i < sc->vmx_ntxqueues; i++) {
1337 		sc->vmx_queue[i].vxq_txqueue.vxtxq_ts =
1338 		    (struct vmxnet3_txq_shared *) kva;
1339 		kva += sizeof(struct vmxnet3_txq_shared);
1340 	}
1341 	for (i = 0; i < sc->vmx_nrxqueues; i++) {
1342 		sc->vmx_queue[i].vxq_rxqueue.vxrxq_rs =
1343 		    (struct vmxnet3_rxq_shared *) kva;
1344 		kva += sizeof(struct vmxnet3_rxq_shared);
1345 	}
1346 
1347 	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
1348 		size = sizeof(struct vmxnet3_rss_shared);
1349 		error = vmxnet3_dma_malloc(sc, size, 128, &sc->vmx_rss_dma);
1350 		if (error) {
1351 			device_printf(dev, "cannot alloc rss shared memory\n");
1352 			return (error);
1353 		}
1354 		sc->vmx_rss =
1355 		    (struct vmxnet3_rss_shared *) sc->vmx_rss_dma.dma_vaddr;
1356 	}
1357 
1358 	return (0);
1359 }
1360 
1361 static void
1362 vmxnet3_free_shared_data(struct vmxnet3_softc *sc)
1363 {
1364 
1365 	if (sc->vmx_rss != NULL) {
1366 		vmxnet3_dma_free(sc, &sc->vmx_rss_dma);
1367 		sc->vmx_rss = NULL;
1368 	}
1369 
1370 	if (sc->vmx_qs != NULL) {
1371 		vmxnet3_dma_free(sc, &sc->vmx_qs_dma);
1372 		sc->vmx_qs = NULL;
1373 	}
1374 
1375 	if (sc->vmx_ds != NULL) {
1376 		vmxnet3_dma_free(sc, &sc->vmx_ds_dma);
1377 		sc->vmx_ds = NULL;
1378 	}
1379 }
1380 
1381 static int
1382 vmxnet3_alloc_txq_data(struct vmxnet3_softc *sc)
1383 {
1384 	device_t dev;
1385 	struct vmxnet3_txqueue *txq;
1386 	struct vmxnet3_txring *txr;
1387 	struct vmxnet3_comp_ring *txc;
1388 	size_t descsz, compsz;
1389 	u_int i;
1390 	int q, error;
1391 
1392 	dev = sc->vmx_dev;
1393 
1394 	for (q = 0; q < sc->vmx_ntxqueues; q++) {
1395 		txq = &sc->vmx_queue[q].vxq_txqueue;
1396 		txr = &txq->vxtxq_cmd_ring;
1397 		txc = &txq->vxtxq_comp_ring;
1398 
1399 		descsz = txr->vxtxr_ndesc * sizeof(struct vmxnet3_txdesc);
1400 		compsz = txr->vxtxr_ndesc * sizeof(struct vmxnet3_txcompdesc);
1401 
1402 		error = vmxnet3_dma_malloc(sc, descsz, 512, &txr->vxtxr_dma);
1403 		if (error) {
1404 			device_printf(dev, "cannot alloc Tx descriptors for "
1405 			    "queue %d error %d\n", q, error);
1406 			return (error);
1407 		}
1408 		txr->vxtxr_txd =
1409 		    (struct vmxnet3_txdesc *) txr->vxtxr_dma.dma_vaddr;
1410 
1411 		error = vmxnet3_dma_malloc(sc, compsz, 512, &txc->vxcr_dma);
1412 		if (error) {
1413 			device_printf(dev, "cannot alloc Tx comp descriptors "
1414 			   "for queue %d error %d\n", q, error);
1415 			return (error);
1416 		}
1417 		txc->vxcr_u.txcd =
1418 		    (struct vmxnet3_txcompdesc *) txc->vxcr_dma.dma_vaddr;
1419 
1420 		for (i = 0; i < txr->vxtxr_ndesc; i++) {
1421 			error = bus_dmamap_create(sc->vmx_dmat, VMXNET3_TX_MAXSIZE,
1422 			    VMXNET3_TX_MAXSEGS, VMXNET3_TX_MAXSEGSIZE, 0, BUS_DMA_NOWAIT,
1423 			    &txr->vxtxr_txbuf[i].vtxb_dmamap);
1424 			if (error) {
1425 				device_printf(dev, "unable to create Tx buf "
1426 				    "dmamap for queue %d idx %d\n", q, i);
1427 				return (error);
1428 			}
1429 		}
1430 	}
1431 
1432 	return (0);
1433 }
1434 
1435 static void
1436 vmxnet3_free_txq_data(struct vmxnet3_softc *sc)
1437 {
1438 	struct vmxnet3_txqueue *txq;
1439 	struct vmxnet3_txring *txr;
1440 	struct vmxnet3_comp_ring *txc;
1441 	struct vmxnet3_txbuf *txb;
1442 	u_int i;
1443 	int q;
1444 
1445 	for (q = 0; q < sc->vmx_ntxqueues; q++) {
1446 		txq = &sc->vmx_queue[q].vxq_txqueue;
1447 		txr = &txq->vxtxq_cmd_ring;
1448 		txc = &txq->vxtxq_comp_ring;
1449 
1450 		for (i = 0; i < txr->vxtxr_ndesc; i++) {
1451 			txb = &txr->vxtxr_txbuf[i];
1452 			if (txb->vtxb_dmamap != NULL) {
1453 				bus_dmamap_destroy(sc->vmx_dmat,
1454 				    txb->vtxb_dmamap);
1455 				txb->vtxb_dmamap = NULL;
1456 			}
1457 		}
1458 
1459 		if (txc->vxcr_u.txcd != NULL) {
1460 			vmxnet3_dma_free(sc, &txc->vxcr_dma);
1461 			txc->vxcr_u.txcd = NULL;
1462 		}
1463 
1464 		if (txr->vxtxr_txd != NULL) {
1465 			vmxnet3_dma_free(sc, &txr->vxtxr_dma);
1466 			txr->vxtxr_txd = NULL;
1467 		}
1468 	}
1469 }
1470 
1471 static int
1472 vmxnet3_alloc_rxq_data(struct vmxnet3_softc *sc)
1473 {
1474 	device_t dev;
1475 	struct vmxnet3_rxqueue *rxq;
1476 	struct vmxnet3_rxring *rxr;
1477 	struct vmxnet3_comp_ring *rxc;
1478 	int descsz, compsz;
1479 	u_int i, j;
1480 	int q, error;
1481 
1482 	dev = sc->vmx_dev;
1483 
1484 	for (q = 0; q < sc->vmx_nrxqueues; q++) {
1485 		rxq = &sc->vmx_queue[q].vxq_rxqueue;
1486 		rxc = &rxq->vxrxq_comp_ring;
1487 		compsz = 0;
1488 
1489 		for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1490 			rxr = &rxq->vxrxq_cmd_ring[i];
1491 
1492 			descsz = rxr->vxrxr_ndesc *
1493 			    sizeof(struct vmxnet3_rxdesc);
1494 			compsz += rxr->vxrxr_ndesc *
1495 			    sizeof(struct vmxnet3_rxcompdesc);
1496 
1497 			error = vmxnet3_dma_malloc(sc, descsz, 512,
1498 			    &rxr->vxrxr_dma);
1499 			if (error) {
1500 				device_printf(dev, "cannot allocate Rx "
1501 				    "descriptors for queue %d/%d error %d\n",
1502 				    i, q, error);
1503 				return (error);
1504 			}
1505 			rxr->vxrxr_rxd =
1506 			    (struct vmxnet3_rxdesc *) rxr->vxrxr_dma.dma_vaddr;
1507 		}
1508 
1509 		error = vmxnet3_dma_malloc(sc, compsz, 512, &rxc->vxcr_dma);
1510 		if (error) {
1511 			device_printf(dev, "cannot alloc Rx comp descriptors "
1512 			    "for queue %d error %d\n", q, error);
1513 			return (error);
1514 		}
1515 		rxc->vxcr_u.rxcd =
1516 		    (struct vmxnet3_rxcompdesc *) rxc->vxcr_dma.dma_vaddr;
1517 
1518 		for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1519 			rxr = &rxq->vxrxq_cmd_ring[i];
1520 
1521 			error = bus_dmamap_create(sc->vmx_dmat, JUMBO_LEN, 1,
1522 			    JUMBO_LEN, 0, BUS_DMA_NOWAIT,
1523 			    &rxr->vxrxr_spare_dmap);
1524 			if (error) {
1525 				device_printf(dev, "unable to create spare "
1526 				    "dmamap for queue %d/%d error %d\n",
1527 				    q, i, error);
1528 				return (error);
1529 			}
1530 
1531 			for (j = 0; j < rxr->vxrxr_ndesc; j++) {
1532 				error = bus_dmamap_create(sc->vmx_dmat, JUMBO_LEN, 1,
1533 				    JUMBO_LEN, 0, BUS_DMA_NOWAIT,
1534 				    &rxr->vxrxr_rxbuf[j].vrxb_dmamap);
1535 				if (error) {
1536 					device_printf(dev, "unable to create "
1537 					    "dmamap for queue %d/%d slot %d "
1538 					    "error %d\n",
1539 					    q, i, j, error);
1540 					return (error);
1541 				}
1542 			}
1543 		}
1544 	}
1545 
1546 	return (0);
1547 }
1548 
1549 static void
1550 vmxnet3_free_rxq_data(struct vmxnet3_softc *sc)
1551 {
1552 	struct vmxnet3_rxqueue *rxq;
1553 	struct vmxnet3_rxring *rxr;
1554 	struct vmxnet3_comp_ring *rxc;
1555 	struct vmxnet3_rxbuf *rxb;
1556 	u_int i, j;
1557 	int q;
1558 
1559 	for (q = 0; q < sc->vmx_nrxqueues; q++) {
1560 		rxq = &sc->vmx_queue[q].vxq_rxqueue;
1561 		rxc = &rxq->vxrxq_comp_ring;
1562 
1563 		for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1564 			rxr = &rxq->vxrxq_cmd_ring[i];
1565 
1566 			if (rxr->vxrxr_spare_dmap != NULL) {
1567 				bus_dmamap_destroy(sc->vmx_dmat,
1568 				    rxr->vxrxr_spare_dmap);
1569 				rxr->vxrxr_spare_dmap = NULL;
1570 			}
1571 
1572 			for (j = 0; j < rxr->vxrxr_ndesc; j++) {
1573 				rxb = &rxr->vxrxr_rxbuf[j];
1574 				if (rxb->vrxb_dmamap != NULL) {
1575 					bus_dmamap_destroy(sc->vmx_dmat,
1576 					    rxb->vrxb_dmamap);
1577 					rxb->vrxb_dmamap = NULL;
1578 				}
1579 			}
1580 		}
1581 
1582 		if (rxc->vxcr_u.rxcd != NULL) {
1583 			vmxnet3_dma_free(sc, &rxc->vxcr_dma);
1584 			rxc->vxcr_u.rxcd = NULL;
1585 		}
1586 
1587 		for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1588 			rxr = &rxq->vxrxq_cmd_ring[i];
1589 
1590 			if (rxr->vxrxr_rxd != NULL) {
1591 				vmxnet3_dma_free(sc, &rxr->vxrxr_dma);
1592 				rxr->vxrxr_rxd = NULL;
1593 			}
1594 		}
1595 	}
1596 }
1597 
1598 static int
1599 vmxnet3_alloc_queue_data(struct vmxnet3_softc *sc)
1600 {
1601 	int error;
1602 
1603 	error = vmxnet3_alloc_txq_data(sc);
1604 	if (error)
1605 		return (error);
1606 
1607 	error = vmxnet3_alloc_rxq_data(sc);
1608 	if (error)
1609 		return (error);
1610 
1611 	return (0);
1612 }
1613 
1614 static void
1615 vmxnet3_free_queue_data(struct vmxnet3_softc *sc)
1616 {
1617 
1618 	if (sc->vmx_queue != NULL) {
1619 		vmxnet3_free_rxq_data(sc);
1620 		vmxnet3_free_txq_data(sc);
1621 	}
1622 }
1623 
1624 static int
1625 vmxnet3_alloc_mcast_table(struct vmxnet3_softc *sc)
1626 {
1627 	int error;
1628 
1629 	error = vmxnet3_dma_malloc(sc, VMXNET3_MULTICAST_MAX * ETHER_ADDR_LEN,
1630 	    32, &sc->vmx_mcast_dma);
1631 	if (error)
1632 		device_printf(sc->vmx_dev, "unable to alloc multicast table\n");
1633 	else
1634 		sc->vmx_mcast = sc->vmx_mcast_dma.dma_vaddr;
1635 
1636 	return (error);
1637 }
1638 
1639 static void
1640 vmxnet3_free_mcast_table(struct vmxnet3_softc *sc)
1641 {
1642 
1643 	if (sc->vmx_mcast != NULL) {
1644 		vmxnet3_dma_free(sc, &sc->vmx_mcast_dma);
1645 		sc->vmx_mcast = NULL;
1646 	}
1647 }
1648 
1649 static void
1650 vmxnet3_init_shared_data(struct vmxnet3_softc *sc)
1651 {
1652 	struct vmxnet3_driver_shared *ds;
1653 	struct vmxnet3_txqueue *txq;
1654 	struct vmxnet3_txq_shared *txs;
1655 	struct vmxnet3_rxqueue *rxq;
1656 	struct vmxnet3_rxq_shared *rxs;
1657 	int i;
1658 
1659 	ds = sc->vmx_ds;
1660 
1661 	/*
1662 	 * Initialize fields of the shared data that remains the same across
1663 	 * reinits. Note the shared data is zero'd when allocated.
1664 	 */
1665 
1666 	ds->magic = VMXNET3_REV1_MAGIC;
1667 
1668 	/* DriverInfo */
1669 	ds->version = VMXNET3_DRIVER_VERSION;
1670 	ds->guest = VMXNET3_GOS_FREEBSD |
1671 #ifdef __LP64__
1672 	    VMXNET3_GOS_64BIT;
1673 #else
1674 	    VMXNET3_GOS_32BIT;
1675 #endif
1676 	ds->vmxnet3_revision = 1;
1677 	ds->upt_version = 1;
1678 
1679 	/* Misc. conf */
1680 	ds->driver_data = vtophys(sc);
1681 	ds->driver_data_len = sizeof(struct vmxnet3_softc);
1682 	ds->queue_shared = sc->vmx_qs_dma.dma_paddr;
1683 	ds->queue_shared_len = sc->vmx_qs_dma.dma_size;
1684 	ds->nrxsg_max = sc->vmx_max_rxsegs;
1685 
1686 	/* RSS conf */
1687 	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
1688 		ds->rss.version = 1;
1689 		ds->rss.paddr = sc->vmx_rss_dma.dma_paddr;
1690 		ds->rss.len = sc->vmx_rss_dma.dma_size;
1691 	}
1692 
1693 	/* Interrupt control. */
1694 	ds->automask = sc->vmx_intr_mask_mode == VMXNET3_IMM_AUTO;
1695 	ds->nintr = sc->vmx_nintrs;
1696 	ds->evintr = sc->vmx_event_intr_idx;
1697 	ds->ictrl = VMXNET3_ICTRL_DISABLE_ALL;
1698 
1699 	for (i = 0; i < sc->vmx_nintrs; i++)
1700 		ds->modlevel[i] = UPT1_IMOD_ADAPTIVE;
1701 
1702 	/* Receive filter. */
1703 	ds->mcast_table = sc->vmx_mcast_dma.dma_paddr;
1704 	ds->mcast_tablelen = sc->vmx_mcast_dma.dma_size;
1705 
1706 	/* Tx queues */
1707 	for (i = 0; i < sc->vmx_ntxqueues; i++) {
1708 		txq = &sc->vmx_queue[i].vxq_txqueue;
1709 		txs = txq->vxtxq_ts;
1710 
1711 		txs->cmd_ring = txq->vxtxq_cmd_ring.vxtxr_dma.dma_paddr;
1712 		txs->cmd_ring_len = txq->vxtxq_cmd_ring.vxtxr_ndesc;
1713 		txs->comp_ring = txq->vxtxq_comp_ring.vxcr_dma.dma_paddr;
1714 		txs->comp_ring_len = txq->vxtxq_comp_ring.vxcr_ndesc;
1715 		txs->driver_data = vtophys(txq);
1716 		txs->driver_data_len = sizeof(struct vmxnet3_txqueue);
1717 	}
1718 
1719 	/* Rx queues */
1720 	for (i = 0; i < sc->vmx_nrxqueues; i++) {
1721 		rxq = &sc->vmx_queue[i].vxq_rxqueue;
1722 		rxs = rxq->vxrxq_rs;
1723 
1724 		rxs->cmd_ring[0] = rxq->vxrxq_cmd_ring[0].vxrxr_dma.dma_paddr;
1725 		rxs->cmd_ring_len[0] = rxq->vxrxq_cmd_ring[0].vxrxr_ndesc;
1726 		rxs->cmd_ring[1] = rxq->vxrxq_cmd_ring[1].vxrxr_dma.dma_paddr;
1727 		rxs->cmd_ring_len[1] = rxq->vxrxq_cmd_ring[1].vxrxr_ndesc;
1728 		rxs->comp_ring = rxq->vxrxq_comp_ring.vxcr_dma.dma_paddr;
1729 		rxs->comp_ring_len = rxq->vxrxq_comp_ring.vxcr_ndesc;
1730 		rxs->driver_data = vtophys(rxq);
1731 		rxs->driver_data_len = sizeof(struct vmxnet3_rxqueue);
1732 	}
1733 }
1734 
1735 static void
1736 vmxnet3_reinit_rss_shared_data(struct vmxnet3_softc *sc)
1737 {
1738 	/*
1739 	 * Use the same key as the Linux driver until FreeBSD can do
1740 	 * RSS (presumably Toeplitz) in software.
1741 	 */
1742 	static const uint8_t rss_key[UPT1_RSS_MAX_KEY_SIZE] = {
1743 	    0x3b, 0x56, 0xd1, 0x56, 0x13, 0x4a, 0xe7, 0xac,
1744 	    0xe8, 0x79, 0x09, 0x75, 0xe8, 0x65, 0x79, 0x28,
1745 	    0x35, 0x12, 0xb9, 0x56, 0x7c, 0x76, 0x4b, 0x70,
1746 	    0xd8, 0x56, 0xa3, 0x18, 0x9b, 0x0a, 0xee, 0xf3,
1747 	    0x96, 0xa6, 0x9f, 0x8f, 0x9e, 0x8c, 0x90, 0xc9,
1748 	};
1749 
1750 	struct vmxnet3_rss_shared *rss;
1751 	int i;
1752 
1753 	rss = sc->vmx_rss;
1754 
1755 	rss->hash_type =
1756 	    UPT1_RSS_HASH_TYPE_IPV4 | UPT1_RSS_HASH_TYPE_TCP_IPV4 |
1757 	    UPT1_RSS_HASH_TYPE_IPV6 | UPT1_RSS_HASH_TYPE_TCP_IPV6;
1758 	rss->hash_func = UPT1_RSS_HASH_FUNC_TOEPLITZ;
1759 	rss->hash_key_size = UPT1_RSS_MAX_KEY_SIZE;
1760 	rss->ind_table_size = UPT1_RSS_MAX_IND_TABLE_SIZE;
1761 	memcpy(rss->hash_key, rss_key, UPT1_RSS_MAX_KEY_SIZE);
1762 
1763 	for (i = 0; i < UPT1_RSS_MAX_IND_TABLE_SIZE; i++)
1764 		rss->ind_table[i] = i % sc->vmx_nrxqueues;
1765 }
1766 
1767 static void
1768 vmxnet3_reinit_shared_data(struct vmxnet3_softc *sc)
1769 {
1770 	struct ifnet *ifp;
1771 	struct vmxnet3_driver_shared *ds;
1772 
1773 	ifp = &sc->vmx_ethercom.ec_if;
1774 	ds = sc->vmx_ds;
1775 
1776 	ds->mtu = ifp->if_mtu;
1777 	ds->ntxqueue = sc->vmx_ntxqueues;
1778 	ds->nrxqueue = sc->vmx_nrxqueues;
1779 
1780 	ds->upt_features = 0;
1781 	if (ifp->if_capenable &
1782 	    (IFCAP_CSUM_IPv4_Rx | IFCAP_CSUM_TCPv4_Rx | IFCAP_CSUM_UDPv4_Rx |
1783 	    IFCAP_CSUM_TCPv6_Rx | IFCAP_CSUM_UDPv6_Rx))
1784 		ds->upt_features |= UPT1_F_CSUM;
1785 	if (sc->vmx_ethercom.ec_capenable & ETHERCAP_VLAN_HWTAGGING)
1786 		ds->upt_features |= UPT1_F_VLAN;
1787 
1788 	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
1789 		ds->upt_features |= UPT1_F_RSS;
1790 		vmxnet3_reinit_rss_shared_data(sc);
1791 	}
1792 
1793 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_DSL, sc->vmx_ds_dma.dma_paddr);
1794 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_DSH,
1795 	    (uint64_t) sc->vmx_ds_dma.dma_paddr >> 32);
1796 }
1797 
1798 static int
1799 vmxnet3_alloc_data(struct vmxnet3_softc *sc)
1800 {
1801 	int error;
1802 
1803 	error = vmxnet3_alloc_shared_data(sc);
1804 	if (error)
1805 		return (error);
1806 
1807 	error = vmxnet3_alloc_queue_data(sc);
1808 	if (error)
1809 		return (error);
1810 
1811 	error = vmxnet3_alloc_mcast_table(sc);
1812 	if (error)
1813 		return (error);
1814 
1815 	vmxnet3_init_shared_data(sc);
1816 
1817 	return (0);
1818 }
1819 
1820 static void
1821 vmxnet3_free_data(struct vmxnet3_softc *sc)
1822 {
1823 
1824 	vmxnet3_free_mcast_table(sc);
1825 	vmxnet3_free_queue_data(sc);
1826 	vmxnet3_free_shared_data(sc);
1827 }
1828 
1829 static int
1830 vmxnet3_setup_interface(struct vmxnet3_softc *sc)
1831 {
1832 	struct ifnet *ifp = &sc->vmx_ethercom.ec_if;
1833 
1834 	vmxnet3_get_lladdr(sc);
1835 	aprint_normal_dev(sc->vmx_dev, "Ethernet address %s\n",
1836 	    ether_sprintf(sc->vmx_lladdr));
1837 	vmxnet3_set_lladdr(sc);
1838 
1839 	strlcpy(ifp->if_xname, device_xname(sc->vmx_dev), IFNAMSIZ);
1840 	ifp->if_softc = sc;
1841 	ifp->if_flags = IFF_BROADCAST | IFF_MULTICAST | IFF_SIMPLEX;
1842 	ifp->if_extflags = IFEF_MPSAFE;
1843 	ifp->if_ioctl = vmxnet3_ioctl;
1844 	ifp->if_start = vmxnet3_start;
1845 	ifp->if_transmit = vmxnet3_transmit;
1846 	ifp->if_watchdog = NULL;
1847 	ifp->if_init = vmxnet3_init;
1848 	ifp->if_stop = vmxnet3_stop;
1849 	sc->vmx_ethercom.ec_if.if_capabilities |=IFCAP_CSUM_IPv4_Rx |
1850 		    IFCAP_CSUM_TCPv4_Tx | IFCAP_CSUM_TCPv4_Rx |
1851 		    IFCAP_CSUM_UDPv4_Tx | IFCAP_CSUM_UDPv4_Rx |
1852 		    IFCAP_CSUM_TCPv6_Tx | IFCAP_CSUM_TCPv6_Rx |
1853 		    IFCAP_CSUM_UDPv6_Tx | IFCAP_CSUM_UDPv6_Rx;
1854 
1855 	ifp->if_capenable = ifp->if_capabilities;
1856 
1857 	sc->vmx_ethercom.ec_if.if_capabilities |= IFCAP_TSOv4 | IFCAP_TSOv6;
1858 
1859 	sc->vmx_ethercom.ec_capabilities |=
1860 	    ETHERCAP_VLAN_MTU | ETHERCAP_VLAN_HWTAGGING | ETHERCAP_JUMBO_MTU;
1861 	sc->vmx_ethercom.ec_capenable |= ETHERCAP_VLAN_HWTAGGING;
1862 
1863 	IFQ_SET_MAXLEN(&ifp->if_snd, sc->vmx_ntxdescs);
1864 	IFQ_SET_READY(&ifp->if_snd);
1865 
1866 	/* Initialize ifmedia structures. */
1867 	sc->vmx_ethercom.ec_ifmedia = &sc->vmx_media;
1868 	ifmedia_init_with_lock(&sc->vmx_media, IFM_IMASK, vmxnet3_ifmedia_change,
1869 	    vmxnet3_ifmedia_status, sc->vmx_mtx);
1870 	ifmedia_add(&sc->vmx_media, IFM_ETHER | IFM_AUTO, 0, NULL);
1871 	ifmedia_add(&sc->vmx_media, IFM_ETHER | IFM_10G_T | IFM_FDX, 0, NULL);
1872 	ifmedia_add(&sc->vmx_media, IFM_ETHER | IFM_10G_T, 0, NULL);
1873 	ifmedia_add(&sc->vmx_media, IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
1874 	ifmedia_add(&sc->vmx_media, IFM_ETHER | IFM_1000_T, 0, NULL);
1875 	ifmedia_set(&sc->vmx_media, IFM_ETHER | IFM_AUTO);
1876 
1877 	if_attach(ifp);
1878 	if_deferred_start_init(ifp, NULL);
1879 	ether_ifattach(ifp, sc->vmx_lladdr);
1880 	ether_set_ifflags_cb(&sc->vmx_ethercom, vmxnet3_ifflags_cb);
1881 	vmxnet3_cmd_link_status(ifp);
1882 
1883 	/* should set before setting interrupts */
1884 	sc->vmx_rx_intr_process_limit = VMXNET3_RX_INTR_PROCESS_LIMIT;
1885 	sc->vmx_rx_process_limit = VMXNET3_RX_PROCESS_LIMIT;
1886 	sc->vmx_tx_intr_process_limit = VMXNET3_TX_INTR_PROCESS_LIMIT;
1887 	sc->vmx_tx_process_limit = VMXNET3_TX_PROCESS_LIMIT;
1888 
1889 	return (0);
1890 }
1891 
1892 static int
1893 vmxnet3_setup_sysctl(struct vmxnet3_softc *sc)
1894 {
1895 	const char *devname;
1896 	struct sysctllog **log;
1897 	const struct sysctlnode *rnode, *rxnode, *txnode;
1898 	int error;
1899 
1900 	log = &sc->vmx_sysctllog;
1901 	devname = device_xname(sc->vmx_dev);
1902 
1903 	error = sysctl_createv(log, 0, NULL, &rnode,
1904 	    0, CTLTYPE_NODE, devname,
1905 	    SYSCTL_DESCR("vmxnet3 information and settings"),
1906 	    NULL, 0, NULL, 0, CTL_HW, CTL_CREATE, CTL_EOL);
1907 	if (error)
1908 		goto out;
1909 	error = sysctl_createv(log, 0, &rnode, NULL,
1910 	    CTLFLAG_READWRITE, CTLTYPE_BOOL, "txrx_workqueue",
1911 	    SYSCTL_DESCR("Use workqueue for packet processing"),
1912 	    NULL, 0, &sc->vmx_txrx_workqueue, 0, CTL_CREATE, CTL_EOL);
1913 	if (error)
1914 		goto out;
1915 
1916 	error = sysctl_createv(log, 0, &rnode, &rxnode,
1917 	    0, CTLTYPE_NODE, "rx",
1918 	    SYSCTL_DESCR("vmxnet3 information and settings for Rx"),
1919 	    NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL);
1920 	if (error)
1921 		goto out;
1922 	error = sysctl_createv(log, 0, &rxnode, NULL,
1923 	    CTLFLAG_READWRITE, CTLTYPE_INT, "intr_process_limit",
1924 	    SYSCTL_DESCR("max number of Rx packets to process for interrupt processing"),
1925 	    NULL, 0, &sc->vmx_rx_intr_process_limit, 0, CTL_CREATE, CTL_EOL);
1926 	if (error)
1927 		goto out;
1928 	error = sysctl_createv(log, 0, &rxnode, NULL,
1929 	    CTLFLAG_READWRITE, CTLTYPE_INT, "process_limit",
1930 	    SYSCTL_DESCR("max number of Rx packets to process for deferred processing"),
1931 	    NULL, 0, &sc->vmx_rx_process_limit, 0, CTL_CREATE, CTL_EOL);
1932 	if (error)
1933 		goto out;
1934 
1935 	error = sysctl_createv(log, 0, &rnode, &txnode,
1936 	    0, CTLTYPE_NODE, "tx",
1937 	    SYSCTL_DESCR("vmxnet3 information and settings for Tx"),
1938 	    NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL);
1939 	if (error)
1940 		goto out;
1941 	error = sysctl_createv(log, 0, &txnode, NULL,
1942 	    CTLFLAG_READWRITE, CTLTYPE_INT, "intr_process_limit",
1943 	    SYSCTL_DESCR("max number of Tx packets to process for interrupt processing"),
1944 	    NULL, 0, &sc->vmx_tx_intr_process_limit, 0, CTL_CREATE, CTL_EOL);
1945 	if (error)
1946 		goto out;
1947 	error = sysctl_createv(log, 0, &txnode, NULL,
1948 	    CTLFLAG_READWRITE, CTLTYPE_INT, "process_limit",
1949 	    SYSCTL_DESCR("max number of Tx packets to process for deferred processing"),
1950 	    NULL, 0, &sc->vmx_tx_process_limit, 0, CTL_CREATE, CTL_EOL);
1951 
1952 out:
1953 	if (error) {
1954 		aprint_error_dev(sc->vmx_dev,
1955 		    "unable to create sysctl node\n");
1956 		sysctl_teardown(log);
1957 	}
1958 	return error;
1959 }
1960 
1961 static int
1962 vmxnet3_setup_stats(struct vmxnet3_softc *sc)
1963 {
1964 	struct vmxnet3_queue *vmxq;
1965 	struct vmxnet3_txqueue *txq;
1966 	struct vmxnet3_rxqueue *rxq;
1967 	int i;
1968 
1969 	for (i = 0; i < sc->vmx_ntxqueues; i++) {
1970 		vmxq = &sc->vmx_queue[i];
1971 		txq = &vmxq->vxq_txqueue;
1972 		evcnt_attach_dynamic(&txq->vxtxq_intr, EVCNT_TYPE_INTR,
1973 		    NULL, txq->vxtxq_name, "Interrupt on queue");
1974 		evcnt_attach_dynamic(&txq->vxtxq_defer, EVCNT_TYPE_MISC,
1975 		    NULL, txq->vxtxq_name, "Handled queue in softint/workqueue");
1976 		evcnt_attach_dynamic(&txq->vxtxq_deferreq, EVCNT_TYPE_MISC,
1977 		    NULL, txq->vxtxq_name, "Requested in softint/workqueue");
1978 		evcnt_attach_dynamic(&txq->vxtxq_pcqdrop, EVCNT_TYPE_MISC,
1979 		    NULL, txq->vxtxq_name, "Dropped in pcq");
1980 		evcnt_attach_dynamic(&txq->vxtxq_transmitdef, EVCNT_TYPE_MISC,
1981 		    NULL, txq->vxtxq_name, "Deferred transmit");
1982 		evcnt_attach_dynamic(&txq->vxtxq_watchdogto, EVCNT_TYPE_MISC,
1983 		    NULL, txq->vxtxq_name, "Watchdog timeout");
1984 		evcnt_attach_dynamic(&txq->vxtxq_defragged, EVCNT_TYPE_MISC,
1985 		    NULL, txq->vxtxq_name, "m_defrag successed");
1986 		evcnt_attach_dynamic(&txq->vxtxq_defrag_failed, EVCNT_TYPE_MISC,
1987 		    NULL, txq->vxtxq_name, "m_defrag failed");
1988 	}
1989 
1990 	for (i = 0; i < sc->vmx_nrxqueues; i++) {
1991 		vmxq = &sc->vmx_queue[i];
1992 		rxq = &vmxq->vxq_rxqueue;
1993 		evcnt_attach_dynamic(&rxq->vxrxq_intr, EVCNT_TYPE_INTR,
1994 		    NULL, rxq->vxrxq_name, "Interrupt on queue");
1995 		evcnt_attach_dynamic(&rxq->vxrxq_defer, EVCNT_TYPE_MISC,
1996 		    NULL, rxq->vxrxq_name, "Handled queue in softint/workqueue");
1997 		evcnt_attach_dynamic(&rxq->vxrxq_deferreq, EVCNT_TYPE_MISC,
1998 		    NULL, rxq->vxrxq_name, "Requested in softint/workqueue");
1999 		evcnt_attach_dynamic(&rxq->vxrxq_mgetcl_failed, EVCNT_TYPE_MISC,
2000 		    NULL, rxq->vxrxq_name, "MCLGET failed");
2001 		evcnt_attach_dynamic(&rxq->vxrxq_mbuf_load_failed, EVCNT_TYPE_MISC,
2002 		    NULL, rxq->vxrxq_name, "bus_dmamap_load_mbuf failed");
2003 	}
2004 
2005 	evcnt_attach_dynamic(&sc->vmx_event_intr, EVCNT_TYPE_INTR,
2006 	    NULL, device_xname(sc->vmx_dev), "Interrupt for other events");
2007 	evcnt_attach_dynamic(&sc->vmx_event_link, EVCNT_TYPE_MISC,
2008 	    NULL, device_xname(sc->vmx_dev), "Link status event");
2009 	evcnt_attach_dynamic(&sc->vmx_event_txqerror, EVCNT_TYPE_MISC,
2010 	    NULL, device_xname(sc->vmx_dev), "Tx queue error event");
2011 	evcnt_attach_dynamic(&sc->vmx_event_rxqerror, EVCNT_TYPE_MISC,
2012 	    NULL, device_xname(sc->vmx_dev), "Rx queue error event");
2013 	evcnt_attach_dynamic(&sc->vmx_event_dic, EVCNT_TYPE_MISC,
2014 	    NULL, device_xname(sc->vmx_dev), "Device impl change event");
2015 	evcnt_attach_dynamic(&sc->vmx_event_debug, EVCNT_TYPE_MISC,
2016 	    NULL, device_xname(sc->vmx_dev), "Debug event");
2017 
2018 	return 0;
2019 }
2020 
2021 static void
2022 vmxnet3_teardown_stats(struct vmxnet3_softc *sc)
2023 {
2024 	struct vmxnet3_queue *vmxq;
2025 	struct vmxnet3_txqueue *txq;
2026 	struct vmxnet3_rxqueue *rxq;
2027 	int i;
2028 
2029 	for (i = 0; i < sc->vmx_ntxqueues; i++) {
2030 		vmxq = &sc->vmx_queue[i];
2031 		txq = &vmxq->vxq_txqueue;
2032 		evcnt_detach(&txq->vxtxq_intr);
2033 		evcnt_detach(&txq->vxtxq_defer);
2034 		evcnt_detach(&txq->vxtxq_deferreq);
2035 		evcnt_detach(&txq->vxtxq_pcqdrop);
2036 		evcnt_detach(&txq->vxtxq_transmitdef);
2037 		evcnt_detach(&txq->vxtxq_watchdogto);
2038 		evcnt_detach(&txq->vxtxq_defragged);
2039 		evcnt_detach(&txq->vxtxq_defrag_failed);
2040 	}
2041 
2042 	for (i = 0; i < sc->vmx_nrxqueues; i++) {
2043 		vmxq = &sc->vmx_queue[i];
2044 		rxq = &vmxq->vxq_rxqueue;
2045 		evcnt_detach(&rxq->vxrxq_intr);
2046 		evcnt_detach(&rxq->vxrxq_defer);
2047 		evcnt_detach(&rxq->vxrxq_deferreq);
2048 		evcnt_detach(&rxq->vxrxq_mgetcl_failed);
2049 		evcnt_detach(&rxq->vxrxq_mbuf_load_failed);
2050 	}
2051 
2052 	evcnt_detach(&sc->vmx_event_intr);
2053 	evcnt_detach(&sc->vmx_event_link);
2054 	evcnt_detach(&sc->vmx_event_txqerror);
2055 	evcnt_detach(&sc->vmx_event_rxqerror);
2056 	evcnt_detach(&sc->vmx_event_dic);
2057 	evcnt_detach(&sc->vmx_event_debug);
2058 }
2059 
2060 static void
2061 vmxnet3_evintr(struct vmxnet3_softc *sc)
2062 {
2063 	device_t dev;
2064 	struct vmxnet3_txq_shared *ts;
2065 	struct vmxnet3_rxq_shared *rs;
2066 	uint32_t event;
2067 	int reset;
2068 
2069 	dev = sc->vmx_dev;
2070 	reset = 0;
2071 
2072 	VMXNET3_CORE_LOCK(sc);
2073 
2074 	/* Clear events. */
2075 	event = sc->vmx_ds->event;
2076 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_EVENT, event);
2077 
2078 	if (event & VMXNET3_EVENT_LINK) {
2079 		sc->vmx_event_link.ev_count++;
2080 		vmxnet3_if_link_status(sc);
2081 		if (sc->vmx_link_active != 0)
2082 			if_schedule_deferred_start(&sc->vmx_ethercom.ec_if);
2083 	}
2084 
2085 	if (event & (VMXNET3_EVENT_TQERROR | VMXNET3_EVENT_RQERROR)) {
2086 		if (event & VMXNET3_EVENT_TQERROR)
2087 			sc->vmx_event_txqerror.ev_count++;
2088 		if (event & VMXNET3_EVENT_RQERROR)
2089 			sc->vmx_event_rxqerror.ev_count++;
2090 
2091 		reset = 1;
2092 		vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_STATUS);
2093 		ts = sc->vmx_queue[0].vxq_txqueue.vxtxq_ts;
2094 		if (ts->stopped != 0)
2095 			device_printf(dev, "Tx queue error %#x\n", ts->error);
2096 		rs = sc->vmx_queue[0].vxq_rxqueue.vxrxq_rs;
2097 		if (rs->stopped != 0)
2098 			device_printf(dev, "Rx queue error %#x\n", rs->error);
2099 		device_printf(dev, "Rx/Tx queue error event ... resetting\n");
2100 	}
2101 
2102 	if (event & VMXNET3_EVENT_DIC) {
2103 		sc->vmx_event_dic.ev_count++;
2104 		device_printf(dev, "device implementation change event\n");
2105 	}
2106 	if (event & VMXNET3_EVENT_DEBUG) {
2107 		sc->vmx_event_debug.ev_count++;
2108 		device_printf(dev, "debug event\n");
2109 	}
2110 
2111 	if (reset != 0)
2112 		vmxnet3_init_locked(sc);
2113 
2114 	VMXNET3_CORE_UNLOCK(sc);
2115 }
2116 
2117 static bool
2118 vmxnet3_txq_eof(struct vmxnet3_txqueue *txq, u_int limit)
2119 {
2120 	struct vmxnet3_softc *sc;
2121 	struct vmxnet3_txring *txr;
2122 	struct vmxnet3_comp_ring *txc;
2123 	struct vmxnet3_txcompdesc *txcd;
2124 	struct vmxnet3_txbuf *txb;
2125 	struct ifnet *ifp;
2126 	struct mbuf *m;
2127 	u_int sop;
2128 	bool more = false;
2129 
2130 	sc = txq->vxtxq_sc;
2131 	txr = &txq->vxtxq_cmd_ring;
2132 	txc = &txq->vxtxq_comp_ring;
2133 	ifp = &sc->vmx_ethercom.ec_if;
2134 
2135 	VMXNET3_TXQ_LOCK_ASSERT(txq);
2136 
2137 	net_stat_ref_t nsr = IF_STAT_GETREF(ifp);
2138 	for (;;) {
2139 		if (limit-- == 0) {
2140 			more = true;
2141 			break;
2142 		}
2143 
2144 		txcd = &txc->vxcr_u.txcd[txc->vxcr_next];
2145 		if (txcd->gen != txc->vxcr_gen)
2146 			break;
2147 		vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
2148 
2149 		if (++txc->vxcr_next == txc->vxcr_ndesc) {
2150 			txc->vxcr_next = 0;
2151 			txc->vxcr_gen ^= 1;
2152 		}
2153 
2154 		sop = txr->vxtxr_next;
2155 		txb = &txr->vxtxr_txbuf[sop];
2156 
2157 		if ((m = txb->vtxb_m) != NULL) {
2158 			bus_dmamap_sync(sc->vmx_dmat, txb->vtxb_dmamap,
2159 			    0, txb->vtxb_dmamap->dm_mapsize,
2160 			    BUS_DMASYNC_POSTWRITE);
2161 			bus_dmamap_unload(sc->vmx_dmat, txb->vtxb_dmamap);
2162 
2163 			if_statinc_ref(nsr, if_opackets);
2164 			if_statadd_ref(nsr, if_obytes, m->m_pkthdr.len);
2165 			if (m->m_flags & M_MCAST)
2166 				if_statinc_ref(nsr, if_omcasts);
2167 
2168 			m_freem(m);
2169 			txb->vtxb_m = NULL;
2170 		}
2171 
2172 		txr->vxtxr_next = (txcd->eop_idx + 1) % txr->vxtxr_ndesc;
2173 	}
2174 	IF_STAT_PUTREF(ifp);
2175 
2176 	if (txr->vxtxr_head == txr->vxtxr_next)
2177 		txq->vxtxq_watchdog = 0;
2178 
2179 	return more;
2180 }
2181 
2182 static int
2183 vmxnet3_newbuf(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rxq,
2184     struct vmxnet3_rxring *rxr)
2185 {
2186 	struct mbuf *m;
2187 	struct vmxnet3_rxdesc *rxd;
2188 	struct vmxnet3_rxbuf *rxb;
2189 	bus_dma_tag_t tag;
2190 	bus_dmamap_t dmap;
2191 	int idx, btype, error;
2192 
2193 	tag = sc->vmx_dmat;
2194 	dmap = rxr->vxrxr_spare_dmap;
2195 	idx = rxr->vxrxr_fill;
2196 	rxd = &rxr->vxrxr_rxd[idx];
2197 	rxb = &rxr->vxrxr_rxbuf[idx];
2198 
2199 	/* Don't allocate buffers for ring 2 for now. */
2200 	if (rxr->vxrxr_rid != 0)
2201 		return -1;
2202 	btype = VMXNET3_BTYPE_HEAD;
2203 
2204 	MGETHDR(m, M_DONTWAIT, MT_DATA);
2205 	if (m == NULL)
2206 		return (ENOBUFS);
2207 
2208 	MCLGET(m, M_DONTWAIT);
2209 	if ((m->m_flags & M_EXT) == 0) {
2210 		rxq->vxrxq_mgetcl_failed.ev_count++;
2211 		m_freem(m);
2212 		return (ENOBUFS);
2213 	}
2214 
2215 	m->m_pkthdr.len = m->m_len = JUMBO_LEN;
2216 	m_adj(m, ETHER_ALIGN);
2217 
2218 	error = bus_dmamap_load_mbuf(sc->vmx_dmat, dmap, m, BUS_DMA_NOWAIT);
2219 	if (error) {
2220 		m_freem(m);
2221 		rxq->vxrxq_mbuf_load_failed.ev_count++;
2222 		return (error);
2223 	}
2224 
2225 	if (rxb->vrxb_m != NULL) {
2226 		bus_dmamap_sync(tag, rxb->vrxb_dmamap,
2227 		    0, rxb->vrxb_dmamap->dm_mapsize,
2228 		    BUS_DMASYNC_POSTREAD);
2229 		bus_dmamap_unload(tag, rxb->vrxb_dmamap);
2230 	}
2231 
2232 	rxr->vxrxr_spare_dmap = rxb->vrxb_dmamap;
2233 	rxb->vrxb_dmamap = dmap;
2234 	rxb->vrxb_m = m;
2235 
2236 	rxd->addr = DMAADDR(dmap);
2237 	rxd->len = m->m_pkthdr.len;
2238 	rxd->btype = btype;
2239 	rxd->gen = rxr->vxrxr_gen;
2240 
2241 	vmxnet3_rxr_increment_fill(rxr);
2242 	return (0);
2243 }
2244 
2245 static void
2246 vmxnet3_rxq_eof_discard(struct vmxnet3_rxqueue *rxq,
2247     struct vmxnet3_rxring *rxr, int idx)
2248 {
2249 	struct vmxnet3_rxdesc *rxd;
2250 
2251 	rxd = &rxr->vxrxr_rxd[idx];
2252 	rxd->gen = rxr->vxrxr_gen;
2253 	vmxnet3_rxr_increment_fill(rxr);
2254 }
2255 
2256 static void
2257 vmxnet3_rxq_discard_chain(struct vmxnet3_rxqueue *rxq)
2258 {
2259 	struct vmxnet3_softc *sc;
2260 	struct vmxnet3_rxring *rxr;
2261 	struct vmxnet3_comp_ring *rxc;
2262 	struct vmxnet3_rxcompdesc *rxcd;
2263 	int idx, eof;
2264 
2265 	sc = rxq->vxrxq_sc;
2266 	rxc = &rxq->vxrxq_comp_ring;
2267 
2268 	do {
2269 		rxcd = &rxc->vxcr_u.rxcd[rxc->vxcr_next];
2270 		if (rxcd->gen != rxc->vxcr_gen)
2271 			break;		/* Not expected. */
2272 		vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
2273 
2274 		if (++rxc->vxcr_next == rxc->vxcr_ndesc) {
2275 			rxc->vxcr_next = 0;
2276 			rxc->vxcr_gen ^= 1;
2277 		}
2278 
2279 		idx = rxcd->rxd_idx;
2280 		eof = rxcd->eop;
2281 		if (rxcd->qid < sc->vmx_nrxqueues)
2282 			rxr = &rxq->vxrxq_cmd_ring[0];
2283 		else
2284 			rxr = &rxq->vxrxq_cmd_ring[1];
2285 		vmxnet3_rxq_eof_discard(rxq, rxr, idx);
2286 	} while (!eof);
2287 }
2288 
2289 static void
2290 vmxnet3_rx_csum(struct vmxnet3_rxcompdesc *rxcd, struct mbuf *m)
2291 {
2292 	if (rxcd->no_csum)
2293 		return;
2294 
2295 	if (rxcd->ipv4) {
2296 		m->m_pkthdr.csum_flags |= M_CSUM_IPv4;
2297 		if (rxcd->ipcsum_ok == 0)
2298 			m->m_pkthdr.csum_flags |= M_CSUM_IPv4_BAD;
2299 	}
2300 
2301 	if (rxcd->fragment)
2302 		return;
2303 
2304 	if (rxcd->tcp) {
2305 		m->m_pkthdr.csum_flags |=
2306 		    rxcd->ipv4 ? M_CSUM_TCPv4 : M_CSUM_TCPv6;
2307 		if ((rxcd->csum_ok) == 0)
2308 			m->m_pkthdr.csum_flags |= M_CSUM_TCP_UDP_BAD;
2309 	}
2310 
2311 	if (rxcd->udp) {
2312 		m->m_pkthdr.csum_flags |=
2313 		    rxcd->ipv4 ? M_CSUM_UDPv4 : M_CSUM_UDPv6 ;
2314 		if ((rxcd->csum_ok) == 0)
2315 			m->m_pkthdr.csum_flags |= M_CSUM_TCP_UDP_BAD;
2316 	}
2317 }
2318 
2319 static void
2320 vmxnet3_rxq_input(struct vmxnet3_rxqueue *rxq,
2321     struct vmxnet3_rxcompdesc *rxcd, struct mbuf *m)
2322 {
2323 	struct vmxnet3_softc *sc;
2324 	struct ifnet *ifp;
2325 
2326 	sc = rxq->vxrxq_sc;
2327 	ifp = &sc->vmx_ethercom.ec_if;
2328 
2329 	if (rxcd->error) {
2330 		if_statinc(ifp, if_ierrors);
2331 		m_freem(m);
2332 		return;
2333 	}
2334 
2335 	if (!rxcd->no_csum)
2336 		vmxnet3_rx_csum(rxcd, m);
2337 	if (rxcd->vlan)
2338 		vlan_set_tag(m, rxcd->vtag);
2339 
2340 	net_stat_ref_t nsr = IF_STAT_GETREF(ifp);
2341 	if_statinc_ref(nsr, if_ipackets);
2342 	if_statadd_ref(nsr, if_ibytes, m->m_pkthdr.len);
2343 	IF_STAT_PUTREF(ifp);
2344 
2345 	if_percpuq_enqueue(ifp->if_percpuq, m);
2346 }
2347 
2348 static bool
2349 vmxnet3_rxq_eof(struct vmxnet3_rxqueue *rxq, u_int limit)
2350 {
2351 	struct vmxnet3_softc *sc;
2352 	struct ifnet *ifp;
2353 	struct vmxnet3_rxring *rxr;
2354 	struct vmxnet3_comp_ring *rxc;
2355 	struct vmxnet3_rxdesc *rxd __diagused;
2356 	struct vmxnet3_rxcompdesc *rxcd;
2357 	struct mbuf *m, *m_head, *m_tail;
2358 	u_int idx, length;
2359 	bool more = false;
2360 
2361 	sc = rxq->vxrxq_sc;
2362 	ifp = &sc->vmx_ethercom.ec_if;
2363 	rxc = &rxq->vxrxq_comp_ring;
2364 
2365 	VMXNET3_RXQ_LOCK_ASSERT(rxq);
2366 
2367 	if (rxq->vxrxq_stopping)
2368 		return more;
2369 
2370 	m_head = rxq->vxrxq_mhead;
2371 	rxq->vxrxq_mhead = NULL;
2372 	m_tail = rxq->vxrxq_mtail;
2373 	rxq->vxrxq_mtail = NULL;
2374 	KASSERT(m_head == NULL || m_tail != NULL);
2375 
2376 	for (;;) {
2377 		if (limit-- == 0) {
2378 			more = true;
2379 			break;
2380 		}
2381 
2382 		rxcd = &rxc->vxcr_u.rxcd[rxc->vxcr_next];
2383 		if (rxcd->gen != rxc->vxcr_gen) {
2384 			rxq->vxrxq_mhead = m_head;
2385 			rxq->vxrxq_mtail = m_tail;
2386 			break;
2387 		}
2388 		vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
2389 
2390 		if (++rxc->vxcr_next == rxc->vxcr_ndesc) {
2391 			rxc->vxcr_next = 0;
2392 			rxc->vxcr_gen ^= 1;
2393 		}
2394 
2395 		idx = rxcd->rxd_idx;
2396 		length = rxcd->len;
2397 		if (rxcd->qid < sc->vmx_nrxqueues)
2398 			rxr = &rxq->vxrxq_cmd_ring[0];
2399 		else
2400 			rxr = &rxq->vxrxq_cmd_ring[1];
2401 		rxd = &rxr->vxrxr_rxd[idx];
2402 
2403 		m = rxr->vxrxr_rxbuf[idx].vrxb_m;
2404 		KASSERT(m != NULL);
2405 
2406 		/*
2407 		 * The host may skip descriptors. We detect this when this
2408 		 * descriptor does not match the previous fill index. Catch
2409 		 * up with the host now.
2410 		 */
2411 		if (__predict_false(rxr->vxrxr_fill != idx)) {
2412 			while (rxr->vxrxr_fill != idx) {
2413 				rxr->vxrxr_rxd[rxr->vxrxr_fill].gen =
2414 				    rxr->vxrxr_gen;
2415 				vmxnet3_rxr_increment_fill(rxr);
2416 			}
2417 		}
2418 
2419 		if (rxcd->sop) {
2420 			/* start of frame w/o head buffer */
2421 			KASSERT(rxd->btype == VMXNET3_BTYPE_HEAD);
2422 			/* start of frame not in ring 0 */
2423 			KASSERT(rxr == &rxq->vxrxq_cmd_ring[0]);
2424 			/* duplicate start of frame? */
2425 			KASSERT(m_head == NULL);
2426 
2427 			if (length == 0) {
2428 				/* Just ignore this descriptor. */
2429 				vmxnet3_rxq_eof_discard(rxq, rxr, idx);
2430 				goto nextp;
2431 			}
2432 
2433 			if (vmxnet3_newbuf(sc, rxq, rxr) != 0) {
2434 				if_statinc(ifp, if_iqdrops);
2435 				vmxnet3_rxq_eof_discard(rxq, rxr, idx);
2436 				if (!rxcd->eop)
2437 					vmxnet3_rxq_discard_chain(rxq);
2438 				goto nextp;
2439 			}
2440 
2441 			m_set_rcvif(m, ifp);
2442 			m->m_pkthdr.len = m->m_len = length;
2443 			m->m_pkthdr.csum_flags = 0;
2444 			m_head = m_tail = m;
2445 
2446 		} else {
2447 			/* non start of frame w/o body buffer */
2448 			KASSERT(rxd->btype == VMXNET3_BTYPE_BODY);
2449 			/* frame not started? */
2450 			KASSERT(m_head != NULL);
2451 
2452 			if (vmxnet3_newbuf(sc, rxq, rxr) != 0) {
2453 				if_statinc(ifp, if_iqdrops);
2454 				vmxnet3_rxq_eof_discard(rxq, rxr, idx);
2455 				if (!rxcd->eop)
2456 					vmxnet3_rxq_discard_chain(rxq);
2457 				m_freem(m_head);
2458 				m_head = m_tail = NULL;
2459 				goto nextp;
2460 			}
2461 
2462 			m->m_len = length;
2463 			m_head->m_pkthdr.len += length;
2464 			m_tail->m_next = m;
2465 			m_tail = m;
2466 		}
2467 
2468 		if (rxcd->eop) {
2469 			vmxnet3_rxq_input(rxq, rxcd, m_head);
2470 			m_head = m_tail = NULL;
2471 
2472 			/* Must recheck after dropping the Rx lock. */
2473 			if (rxq->vxrxq_stopping)
2474 				break;
2475 		}
2476 
2477 nextp:
2478 		if (__predict_false(rxq->vxrxq_rs->update_rxhead)) {
2479 			int qid = rxcd->qid;
2480 			bus_size_t r;
2481 
2482 			idx = (idx + 1) % rxr->vxrxr_ndesc;
2483 			if (qid >= sc->vmx_nrxqueues) {
2484 				qid -= sc->vmx_nrxqueues;
2485 				r = VMXNET3_BAR0_RXH2(qid);
2486 			} else
2487 				r = VMXNET3_BAR0_RXH1(qid);
2488 			vmxnet3_write_bar0(sc, r, idx);
2489 		}
2490 	}
2491 
2492 	return more;
2493 }
2494 
2495 static inline void
2496 vmxnet3_sched_handle_queue(struct vmxnet3_softc *sc, struct vmxnet3_queue *vmxq)
2497 {
2498 
2499 	if (vmxq->vxq_workqueue) {
2500 		/*
2501 		 * When this function is called, "vmxq" is owned by one CPU.
2502 		 * so, atomic operation is not required here.
2503 		 */
2504 		if (!vmxq->vxq_wq_enqueued) {
2505 			vmxq->vxq_wq_enqueued = true;
2506 			workqueue_enqueue(sc->vmx_queue_wq,
2507 			    &vmxq->vxq_wq_cookie, curcpu());
2508 		}
2509 	} else {
2510 		softint_schedule(vmxq->vxq_si);
2511 	}
2512 }
2513 
2514 static int
2515 vmxnet3_legacy_intr(void *xsc)
2516 {
2517 	struct vmxnet3_softc *sc;
2518 	struct vmxnet3_queue *vmxq;
2519 	struct vmxnet3_txqueue *txq;
2520 	struct vmxnet3_rxqueue *rxq;
2521 	u_int txlimit, rxlimit;
2522 	bool txmore, rxmore;
2523 
2524 	sc = xsc;
2525 	vmxq = &sc->vmx_queue[0];
2526 	txq = &vmxq->vxq_txqueue;
2527 	rxq = &vmxq->vxq_rxqueue;
2528 	txlimit = sc->vmx_tx_intr_process_limit;
2529 	rxlimit = sc->vmx_rx_intr_process_limit;
2530 
2531 	if (sc->vmx_intr_type == VMXNET3_IT_LEGACY) {
2532 		if (vmxnet3_read_bar1(sc, VMXNET3_BAR1_INTR) == 0)
2533 			return (0);
2534 	}
2535 	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
2536 		vmxnet3_disable_all_intrs(sc);
2537 
2538 	if (sc->vmx_ds->event != 0)
2539 		vmxnet3_evintr(sc);
2540 
2541 	VMXNET3_TXQ_LOCK(txq);
2542 	txmore = vmxnet3_txq_eof(txq, txlimit);
2543 	VMXNET3_TXQ_UNLOCK(txq);
2544 
2545 	VMXNET3_RXQ_LOCK(rxq);
2546 	rxmore = vmxnet3_rxq_eof(rxq, rxlimit);
2547 	VMXNET3_RXQ_UNLOCK(rxq);
2548 
2549 	if (txmore || rxmore)
2550 		vmxnet3_sched_handle_queue(sc, vmxq);
2551 	else {
2552 		if_schedule_deferred_start(&sc->vmx_ethercom.ec_if);
2553 		vmxnet3_enable_all_intrs(sc);
2554 	}
2555 
2556 	return (1);
2557 }
2558 
2559 static int
2560 vmxnet3_txrxq_intr(void *xvmxq)
2561 {
2562 	struct vmxnet3_softc *sc;
2563 	struct vmxnet3_queue *vmxq;
2564 	struct vmxnet3_txqueue *txq;
2565 	struct vmxnet3_rxqueue *rxq;
2566 	u_int txlimit, rxlimit;
2567 	bool txmore, rxmore;
2568 
2569 	vmxq = xvmxq;
2570 	txq = &vmxq->vxq_txqueue;
2571 	rxq = &vmxq->vxq_rxqueue;
2572 	sc = txq->vxtxq_sc;
2573 	txlimit = sc->vmx_tx_intr_process_limit;
2574 	rxlimit = sc->vmx_rx_intr_process_limit;
2575 	vmxq->vxq_workqueue = sc->vmx_txrx_workqueue;
2576 
2577 	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
2578 		vmxnet3_disable_intr(sc, vmxq->vxq_intr_idx);
2579 
2580 	VMXNET3_TXQ_LOCK(txq);
2581 	txq->vxtxq_intr.ev_count++;
2582 	txmore = vmxnet3_txq_eof(txq, txlimit);
2583 	VMXNET3_TXQ_UNLOCK(txq);
2584 
2585 	VMXNET3_RXQ_LOCK(rxq);
2586 	rxq->vxrxq_intr.ev_count++;
2587 	rxmore = vmxnet3_rxq_eof(rxq, rxlimit);
2588 	VMXNET3_RXQ_UNLOCK(rxq);
2589 
2590 	if (txmore || rxmore)
2591 		vmxnet3_sched_handle_queue(sc, vmxq);
2592 	else {
2593 		/* for ALTQ */
2594 		if (vmxq->vxq_id == 0)
2595 			if_schedule_deferred_start(&sc->vmx_ethercom.ec_if);
2596 		softint_schedule(txq->vxtxq_si);
2597 
2598 		vmxnet3_enable_intr(sc, vmxq->vxq_intr_idx);
2599 	}
2600 
2601 	return (1);
2602 }
2603 
2604 static void
2605 vmxnet3_handle_queue(void *xvmxq)
2606 {
2607 	struct vmxnet3_softc *sc;
2608 	struct vmxnet3_queue *vmxq;
2609 	struct vmxnet3_txqueue *txq;
2610 	struct vmxnet3_rxqueue *rxq;
2611 	u_int txlimit, rxlimit;
2612 	bool txmore, rxmore;
2613 
2614 	vmxq = xvmxq;
2615 	txq = &vmxq->vxq_txqueue;
2616 	rxq = &vmxq->vxq_rxqueue;
2617 	sc = txq->vxtxq_sc;
2618 	txlimit = sc->vmx_tx_process_limit;
2619 	rxlimit = sc->vmx_rx_process_limit;
2620 
2621 	VMXNET3_TXQ_LOCK(txq);
2622 	txq->vxtxq_defer.ev_count++;
2623 	txmore = vmxnet3_txq_eof(txq, txlimit);
2624 	if (txmore)
2625 		txq->vxtxq_deferreq.ev_count++;
2626 	/* for ALTQ */
2627 	if (vmxq->vxq_id == 0)
2628 		if_schedule_deferred_start(&sc->vmx_ethercom.ec_if);
2629 	softint_schedule(txq->vxtxq_si);
2630 	VMXNET3_TXQ_UNLOCK(txq);
2631 
2632 	VMXNET3_RXQ_LOCK(rxq);
2633 	rxq->vxrxq_defer.ev_count++;
2634 	rxmore = vmxnet3_rxq_eof(rxq, rxlimit);
2635 	if (rxmore)
2636 		rxq->vxrxq_deferreq.ev_count++;
2637 	VMXNET3_RXQ_UNLOCK(rxq);
2638 
2639 	if (txmore || rxmore)
2640 		vmxnet3_sched_handle_queue(sc, vmxq);
2641 	else
2642 		vmxnet3_enable_intr(sc, vmxq->vxq_intr_idx);
2643 }
2644 
2645 static void
2646 vmxnet3_handle_queue_work(struct work *wk, void *context)
2647 {
2648 	struct vmxnet3_queue *vmxq;
2649 
2650 	vmxq = container_of(wk, struct vmxnet3_queue, vxq_wq_cookie);
2651 	vmxq->vxq_wq_enqueued = false;
2652 	vmxnet3_handle_queue(vmxq);
2653 }
2654 
2655 static int
2656 vmxnet3_event_intr(void *xsc)
2657 {
2658 	struct vmxnet3_softc *sc;
2659 
2660 	sc = xsc;
2661 
2662 	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
2663 		vmxnet3_disable_intr(sc, sc->vmx_event_intr_idx);
2664 
2665 	sc->vmx_event_intr.ev_count++;
2666 
2667 	if (sc->vmx_ds->event != 0)
2668 		vmxnet3_evintr(sc);
2669 
2670 	vmxnet3_enable_intr(sc, sc->vmx_event_intr_idx);
2671 
2672 	return (1);
2673 }
2674 
2675 static void
2676 vmxnet3_txstop(struct vmxnet3_softc *sc, struct vmxnet3_txqueue *txq)
2677 {
2678 	struct vmxnet3_txring *txr;
2679 	struct vmxnet3_txbuf *txb;
2680 	u_int i;
2681 
2682 	txr = &txq->vxtxq_cmd_ring;
2683 
2684 	for (i = 0; i < txr->vxtxr_ndesc; i++) {
2685 		txb = &txr->vxtxr_txbuf[i];
2686 
2687 		if (txb->vtxb_m == NULL)
2688 			continue;
2689 
2690 		bus_dmamap_sync(sc->vmx_dmat, txb->vtxb_dmamap,
2691 		    0, txb->vtxb_dmamap->dm_mapsize,
2692 		    BUS_DMASYNC_POSTWRITE);
2693 		bus_dmamap_unload(sc->vmx_dmat, txb->vtxb_dmamap);
2694 		m_freem(txb->vtxb_m);
2695 		txb->vtxb_m = NULL;
2696 	}
2697 }
2698 
2699 static void
2700 vmxnet3_rxstop(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rxq)
2701 {
2702 	struct vmxnet3_rxring *rxr;
2703 	struct vmxnet3_rxbuf *rxb;
2704 	u_int i, j;
2705 
2706 	if (rxq->vxrxq_mhead != NULL) {
2707 		m_freem(rxq->vxrxq_mhead);
2708 		rxq->vxrxq_mhead = NULL;
2709 		rxq->vxrxq_mtail = NULL;
2710 	}
2711 
2712 	for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
2713 		rxr = &rxq->vxrxq_cmd_ring[i];
2714 
2715 		for (j = 0; j < rxr->vxrxr_ndesc; j++) {
2716 			rxb = &rxr->vxrxr_rxbuf[j];
2717 
2718 			if (rxb->vrxb_m == NULL)
2719 				continue;
2720 
2721 			bus_dmamap_sync(sc->vmx_dmat, rxb->vrxb_dmamap,
2722 			    0, rxb->vrxb_dmamap->dm_mapsize,
2723 			    BUS_DMASYNC_POSTREAD);
2724 			bus_dmamap_unload(sc->vmx_dmat, rxb->vrxb_dmamap);
2725 			m_freem(rxb->vrxb_m);
2726 			rxb->vrxb_m = NULL;
2727 		}
2728 	}
2729 }
2730 
2731 static void
2732 vmxnet3_stop_rendezvous(struct vmxnet3_softc *sc)
2733 {
2734 	struct vmxnet3_rxqueue *rxq;
2735 	struct vmxnet3_txqueue *txq;
2736 	struct vmxnet3_queue *vmxq;
2737 	int i;
2738 
2739 	for (i = 0; i < sc->vmx_nrxqueues; i++) {
2740 		rxq = &sc->vmx_queue[i].vxq_rxqueue;
2741 		VMXNET3_RXQ_LOCK(rxq);
2742 		rxq->vxrxq_stopping = true;
2743 		VMXNET3_RXQ_UNLOCK(rxq);
2744 	}
2745 	for (i = 0; i < sc->vmx_ntxqueues; i++) {
2746 		txq = &sc->vmx_queue[i].vxq_txqueue;
2747 		VMXNET3_TXQ_LOCK(txq);
2748 		txq->vxtxq_stopping = true;
2749 		VMXNET3_TXQ_UNLOCK(txq);
2750 	}
2751 	for (i = 0; i < sc->vmx_nrxqueues; i++) {
2752 		vmxq = &sc->vmx_queue[i];
2753 		workqueue_wait(sc->vmx_queue_wq, &vmxq->vxq_wq_cookie);
2754 	}
2755 }
2756 
2757 static void
2758 vmxnet3_stop_locked(struct vmxnet3_softc *sc)
2759 {
2760 	struct ifnet *ifp = &sc->vmx_ethercom.ec_if;
2761 	int q;
2762 
2763 	VMXNET3_CORE_LOCK_ASSERT(sc);
2764 	KASSERT(IFNET_LOCKED(ifp));
2765 
2766 	vmxnet3_stop_rendezvous(sc);
2767 
2768 	sc->vmx_mcastactive = false;
2769 	sc->vmx_link_active = 0;
2770 	callout_halt(&sc->vmx_tick, sc->vmx_mtx);
2771 
2772 	ifp->if_flags &= ~IFF_RUNNING;
2773 
2774 	/* Disable interrupts. */
2775 	vmxnet3_disable_all_intrs(sc);
2776 	vmxnet3_write_cmd(sc, VMXNET3_CMD_DISABLE);
2777 
2778 	for (q = 0; q < sc->vmx_ntxqueues; q++)
2779 		vmxnet3_txstop(sc, &sc->vmx_queue[q].vxq_txqueue);
2780 	for (q = 0; q < sc->vmx_nrxqueues; q++)
2781 		vmxnet3_rxstop(sc, &sc->vmx_queue[q].vxq_rxqueue);
2782 
2783 	vmxnet3_write_cmd(sc, VMXNET3_CMD_RESET);
2784 }
2785 
2786 static void
2787 vmxnet3_stop(struct ifnet *ifp, int disable)
2788 {
2789 	struct vmxnet3_softc *sc = ifp->if_softc;
2790 
2791 	KASSERT(IFNET_LOCKED(ifp));
2792 
2793 	VMXNET3_CORE_LOCK(sc);
2794 	vmxnet3_stop_locked(sc);
2795 	VMXNET3_CORE_UNLOCK(sc);
2796 }
2797 
2798 static void
2799 vmxnet3_txinit(struct vmxnet3_softc *sc, struct vmxnet3_txqueue *txq)
2800 {
2801 	struct vmxnet3_txring *txr;
2802 	struct vmxnet3_comp_ring *txc;
2803 
2804 	txr = &txq->vxtxq_cmd_ring;
2805 	txr->vxtxr_head = 0;
2806 	txr->vxtxr_next = 0;
2807 	txr->vxtxr_gen = VMXNET3_INIT_GEN;
2808 	memset(txr->vxtxr_txd, 0,
2809 	    txr->vxtxr_ndesc * sizeof(struct vmxnet3_txdesc));
2810 
2811 	txc = &txq->vxtxq_comp_ring;
2812 	txc->vxcr_next = 0;
2813 	txc->vxcr_gen = VMXNET3_INIT_GEN;
2814 	memset(txc->vxcr_u.txcd, 0,
2815 	    txc->vxcr_ndesc * sizeof(struct vmxnet3_txcompdesc));
2816 }
2817 
2818 static int
2819 vmxnet3_rxinit(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rxq)
2820 {
2821 	struct vmxnet3_rxring *rxr;
2822 	struct vmxnet3_comp_ring *rxc;
2823 	u_int i, populate, idx;
2824 	int error;
2825 
2826 	/* LRO and jumbo frame is not supported yet */
2827 	populate = 1;
2828 
2829 	for (i = 0; i < populate; i++) {
2830 		rxr = &rxq->vxrxq_cmd_ring[i];
2831 		rxr->vxrxr_fill = 0;
2832 		rxr->vxrxr_gen = VMXNET3_INIT_GEN;
2833 		memset(rxr->vxrxr_rxd, 0,
2834 		    rxr->vxrxr_ndesc * sizeof(struct vmxnet3_rxdesc));
2835 
2836 		for (idx = 0; idx < rxr->vxrxr_ndesc; idx++) {
2837 			error = vmxnet3_newbuf(sc, rxq, rxr);
2838 			if (error)
2839 				return (error);
2840 		}
2841 	}
2842 
2843 	for (/**/; i < VMXNET3_RXRINGS_PERQ; i++) {
2844 		rxr = &rxq->vxrxq_cmd_ring[i];
2845 		rxr->vxrxr_fill = 0;
2846 		rxr->vxrxr_gen = 0;
2847 		memset(rxr->vxrxr_rxd, 0,
2848 		    rxr->vxrxr_ndesc * sizeof(struct vmxnet3_rxdesc));
2849 	}
2850 
2851 	rxc = &rxq->vxrxq_comp_ring;
2852 	rxc->vxcr_next = 0;
2853 	rxc->vxcr_gen = VMXNET3_INIT_GEN;
2854 	memset(rxc->vxcr_u.rxcd, 0,
2855 	    rxc->vxcr_ndesc * sizeof(struct vmxnet3_rxcompdesc));
2856 
2857 	return (0);
2858 }
2859 
2860 static int
2861 vmxnet3_reinit_queues(struct vmxnet3_softc *sc)
2862 {
2863 	device_t dev;
2864 	int q, error;
2865 	dev = sc->vmx_dev;
2866 
2867 	for (q = 0; q < sc->vmx_ntxqueues; q++)
2868 		vmxnet3_txinit(sc, &sc->vmx_queue[q].vxq_txqueue);
2869 
2870 	for (q = 0; q < sc->vmx_nrxqueues; q++) {
2871 		error = vmxnet3_rxinit(sc, &sc->vmx_queue[q].vxq_rxqueue);
2872 		if (error) {
2873 			device_printf(dev, "cannot populate Rx queue %d\n", q);
2874 			return (error);
2875 		}
2876 	}
2877 
2878 	return (0);
2879 }
2880 
2881 static int
2882 vmxnet3_enable_device(struct vmxnet3_softc *sc)
2883 {
2884 	int q;
2885 
2886 	if (vmxnet3_read_cmd(sc, VMXNET3_CMD_ENABLE) != 0) {
2887 		device_printf(sc->vmx_dev, "device enable command failed!\n");
2888 		return (1);
2889 	}
2890 
2891 	/* Reset the Rx queue heads. */
2892 	for (q = 0; q < sc->vmx_nrxqueues; q++) {
2893 		vmxnet3_write_bar0(sc, VMXNET3_BAR0_RXH1(q), 0);
2894 		vmxnet3_write_bar0(sc, VMXNET3_BAR0_RXH2(q), 0);
2895 	}
2896 
2897 	return (0);
2898 }
2899 
2900 static void
2901 vmxnet3_reinit_rxfilters(struct vmxnet3_softc *sc)
2902 {
2903 
2904 	vmxnet3_set_rxfilter(sc);
2905 
2906 	memset(sc->vmx_ds->vlan_filter, 0, sizeof(sc->vmx_ds->vlan_filter));
2907 	vmxnet3_write_cmd(sc, VMXNET3_CMD_VLAN_FILTER);
2908 }
2909 
2910 static int
2911 vmxnet3_reinit(struct vmxnet3_softc *sc)
2912 {
2913 
2914 	VMXNET3_CORE_LOCK_ASSERT(sc);
2915 
2916 	vmxnet3_set_lladdr(sc);
2917 	vmxnet3_reinit_shared_data(sc);
2918 
2919 	if (vmxnet3_reinit_queues(sc) != 0)
2920 		return (ENXIO);
2921 
2922 	if (vmxnet3_enable_device(sc) != 0)
2923 		return (ENXIO);
2924 
2925 	vmxnet3_reinit_rxfilters(sc);
2926 
2927 	return (0);
2928 }
2929 
2930 static int
2931 vmxnet3_init_locked(struct vmxnet3_softc *sc)
2932 {
2933 	struct ifnet *ifp = &sc->vmx_ethercom.ec_if;
2934 	int q;
2935 	int error;
2936 
2937 	KASSERT(IFNET_LOCKED(ifp));
2938 	VMXNET3_CORE_LOCK_ASSERT(sc);
2939 
2940 	vmxnet3_stop_locked(sc);
2941 
2942 	error = vmxnet3_reinit(sc);
2943 	if (error) {
2944 		vmxnet3_stop_locked(sc);
2945 		return (error);
2946 	}
2947 
2948 	ifp->if_flags |= IFF_RUNNING;
2949 	vmxnet3_if_link_status(sc);
2950 	sc->vmx_mcastactive = true;
2951 
2952 	vmxnet3_enable_all_intrs(sc);
2953 	callout_reset(&sc->vmx_tick, hz, vmxnet3_tick, sc);
2954 
2955 	for (q = 0; q < sc->vmx_ntxqueues; q++) {
2956 		VMXNET3_TXQ_LOCK(&sc->vmx_queue[q].vxq_txqueue);
2957 		sc->vmx_queue[q].vxq_txqueue.vxtxq_stopping = false;
2958 		VMXNET3_TXQ_UNLOCK(&sc->vmx_queue[q].vxq_txqueue);
2959 	}
2960 	for (q = 0; q < sc->vmx_nrxqueues; q++) {
2961 		VMXNET3_RXQ_LOCK(&sc->vmx_queue[q].vxq_rxqueue);
2962 		sc->vmx_queue[q].vxq_rxqueue.vxrxq_stopping = false;
2963 		VMXNET3_RXQ_UNLOCK(&sc->vmx_queue[q].vxq_rxqueue);
2964 	}
2965 
2966 	return (0);
2967 }
2968 
2969 static int
2970 vmxnet3_init(struct ifnet *ifp)
2971 {
2972 	struct vmxnet3_softc *sc = ifp->if_softc;
2973 	int error;
2974 
2975 	KASSERT(IFNET_LOCKED(ifp));
2976 
2977 	VMXNET3_CORE_LOCK(sc);
2978 	error = vmxnet3_init_locked(sc);
2979 	VMXNET3_CORE_UNLOCK(sc);
2980 
2981 	return (error);
2982 }
2983 
2984 static int
2985 vmxnet3_txq_offload_ctx(struct vmxnet3_txqueue *txq, struct mbuf *m,
2986     int *start, int *csum_start)
2987 {
2988 	struct ether_header *eh;
2989 	struct mbuf *mp;
2990 	int offset, csum_off, iphl, offp;
2991 	bool v4;
2992 
2993 	eh = mtod(m, struct ether_header *);
2994 	switch (htons(eh->ether_type)) {
2995 	case ETHERTYPE_IP:
2996 	case ETHERTYPE_IPV6:
2997 		offset = ETHER_HDR_LEN;
2998 		break;
2999 	case ETHERTYPE_VLAN:
3000 		offset = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3001 		break;
3002 	default:
3003 		m_freem(m);
3004 		return (EINVAL);
3005 	}
3006 
3007 	if ((m->m_pkthdr.csum_flags &
3008 	    (M_CSUM_TSOv4 | M_CSUM_UDPv4 | M_CSUM_TCPv4)) != 0) {
3009 		iphl = M_CSUM_DATA_IPv4_IPHL(m->m_pkthdr.csum_data);
3010 		v4 = true;
3011 	} else {
3012 		iphl = M_CSUM_DATA_IPv6_IPHL(m->m_pkthdr.csum_data);
3013 		v4 = false;
3014 	}
3015 	*start = offset + iphl;
3016 
3017 	if (m->m_pkthdr.csum_flags &
3018 	    (M_CSUM_TCPv4 | M_CSUM_TCPv6 | M_CSUM_TSOv4 | M_CSUM_TSOv6)) {
3019 		csum_off = offsetof(struct tcphdr, th_sum);
3020 	} else {
3021 		csum_off = offsetof(struct udphdr, uh_sum);
3022 	}
3023 
3024 	*csum_start = *start + csum_off;
3025 	mp = m_pulldown(m, 0, *csum_start + 2, &offp);
3026 	if (!mp) {
3027 		/* m is already freed */
3028 		return ENOBUFS;
3029 	}
3030 
3031 	if (m->m_pkthdr.csum_flags & (M_CSUM_TSOv4 | M_CSUM_TSOv6)) {
3032 		struct tcphdr *tcp;
3033 
3034 		txq->vxtxq_stats.vmtxs_tso++;
3035 		tcp = (void *)(mtod(mp, char *) + offp + *start);
3036 
3037 		if (v4) {
3038 			struct ip *ip;
3039 
3040 			ip = (void *)(mtod(mp, char *) + offp + offset);
3041 			tcp->th_sum = in_cksum_phdr(ip->ip_src.s_addr,
3042 			    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3043 		} else {
3044 			struct ip6_hdr *ip6;
3045 
3046 			ip6 = (void *)(mtod(mp, char *) + offp + offset);
3047 			tcp->th_sum = in6_cksum_phdr(&ip6->ip6_src,
3048 			    &ip6->ip6_dst, 0, htonl(IPPROTO_TCP));
3049 		}
3050 
3051 		/*
3052 		 * For TSO, the size of the protocol header is also
3053 		 * included in the descriptor header size.
3054 		 */
3055 		*start += (tcp->th_off << 2);
3056 	} else
3057 		txq->vxtxq_stats.vmtxs_csum++;
3058 
3059 	return (0);
3060 }
3061 
3062 static int
3063 vmxnet3_txq_load_mbuf(struct vmxnet3_txqueue *txq, struct mbuf **m0,
3064     bus_dmamap_t dmap)
3065 {
3066 	struct mbuf *m;
3067 	bus_dma_tag_t tag;
3068 	int error;
3069 
3070 	m = *m0;
3071 	tag = txq->vxtxq_sc->vmx_dmat;
3072 
3073 	error = bus_dmamap_load_mbuf(tag, dmap, m, BUS_DMA_NOWAIT);
3074 	if (error == 0 || error != EFBIG)
3075 		return (error);
3076 
3077 	m = m_defrag(m, M_NOWAIT);
3078 	if (m != NULL) {
3079 		*m0 = m;
3080 		error = bus_dmamap_load_mbuf(tag, dmap, m, BUS_DMA_NOWAIT);
3081 	} else
3082 		error = ENOBUFS;
3083 
3084 	if (error) {
3085 		m_freem(*m0);
3086 		*m0 = NULL;
3087 		txq->vxtxq_defrag_failed.ev_count++;
3088 	} else
3089 		txq->vxtxq_defragged.ev_count++;
3090 
3091 	return (error);
3092 }
3093 
3094 static void
3095 vmxnet3_txq_unload_mbuf(struct vmxnet3_txqueue *txq, bus_dmamap_t dmap)
3096 {
3097 
3098 	bus_dmamap_unload(txq->vxtxq_sc->vmx_dmat, dmap);
3099 }
3100 
3101 static int
3102 vmxnet3_txq_encap(struct vmxnet3_txqueue *txq, struct mbuf **m0)
3103 {
3104 	struct vmxnet3_softc *sc;
3105 	struct vmxnet3_txring *txr;
3106 	struct vmxnet3_txdesc *txd, *sop;
3107 	struct mbuf *m;
3108 	bus_dmamap_t dmap;
3109 	bus_dma_segment_t *segs;
3110 	int i, gen, start, csum_start, nsegs, error;
3111 
3112 	sc = txq->vxtxq_sc;
3113 	start = 0;
3114 	txd = NULL;
3115 	txr = &txq->vxtxq_cmd_ring;
3116 	dmap = txr->vxtxr_txbuf[txr->vxtxr_head].vtxb_dmamap;
3117 	csum_start = 0; /* GCC */
3118 
3119 	error = vmxnet3_txq_load_mbuf(txq, m0, dmap);
3120 	if (error)
3121 		return (error);
3122 
3123 	nsegs = dmap->dm_nsegs;
3124 	segs = dmap->dm_segs;
3125 
3126 	m = *m0;
3127 	KASSERT(m->m_flags & M_PKTHDR);
3128 	KASSERT(nsegs <= VMXNET3_TX_MAXSEGS);
3129 
3130 	if (vmxnet3_txring_avail(txr) < nsegs) {
3131 		txq->vxtxq_stats.vmtxs_full++;
3132 		vmxnet3_txq_unload_mbuf(txq, dmap);
3133 		return (ENOSPC);
3134 	} else if (m->m_pkthdr.csum_flags & VMXNET3_CSUM_ALL_OFFLOAD) {
3135 		error = vmxnet3_txq_offload_ctx(txq, m, &start, &csum_start);
3136 		if (error) {
3137 			/* m is already freed */
3138 			txq->vxtxq_stats.vmtxs_offload_failed++;
3139 			vmxnet3_txq_unload_mbuf(txq, dmap);
3140 			*m0 = NULL;
3141 			return (error);
3142 		}
3143 	}
3144 
3145 	txr->vxtxr_txbuf[txr->vxtxr_head].vtxb_m = m;
3146 	sop = &txr->vxtxr_txd[txr->vxtxr_head];
3147 	gen = txr->vxtxr_gen ^ 1;	/* Owned by cpu (yet) */
3148 
3149 	for (i = 0; i < nsegs; i++) {
3150 		txd = &txr->vxtxr_txd[txr->vxtxr_head];
3151 
3152 		txd->addr = segs[i].ds_addr;
3153 		txd->len = segs[i].ds_len;
3154 		txd->gen = gen;
3155 		txd->dtype = 0;
3156 		txd->offload_mode = VMXNET3_OM_NONE;
3157 		txd->offload_pos = 0;
3158 		txd->hlen = 0;
3159 		txd->eop = 0;
3160 		txd->compreq = 0;
3161 		txd->vtag_mode = 0;
3162 		txd->vtag = 0;
3163 
3164 		if (++txr->vxtxr_head == txr->vxtxr_ndesc) {
3165 			txr->vxtxr_head = 0;
3166 			txr->vxtxr_gen ^= 1;
3167 		}
3168 		gen = txr->vxtxr_gen;
3169 	}
3170 	txd->eop = 1;
3171 	txd->compreq = 1;
3172 
3173 	if (vlan_has_tag(m)) {
3174 		sop->vtag_mode = 1;
3175 		sop->vtag = vlan_get_tag(m);
3176 	}
3177 
3178 	if (m->m_pkthdr.csum_flags & (M_CSUM_TSOv4 | M_CSUM_TSOv6)) {
3179 		sop->offload_mode = VMXNET3_OM_TSO;
3180 		sop->hlen = start;
3181 		sop->offload_pos = m->m_pkthdr.segsz;
3182 	} else if (m->m_pkthdr.csum_flags & (VMXNET3_CSUM_OFFLOAD |
3183 	    VMXNET3_CSUM_OFFLOAD_IPV6)) {
3184 		sop->offload_mode = VMXNET3_OM_CSUM;
3185 		sop->hlen = start;
3186 		sop->offload_pos = csum_start;
3187 	}
3188 
3189 	/* Finally, change the ownership. */
3190 	vmxnet3_barrier(sc, VMXNET3_BARRIER_WR);
3191 	sop->gen ^= 1;
3192 
3193 	txq->vxtxq_ts->npending += nsegs;
3194 	if (txq->vxtxq_ts->npending >= txq->vxtxq_ts->intr_threshold) {
3195 		struct vmxnet3_queue *vmxq;
3196 		vmxq = container_of(txq, struct vmxnet3_queue, vxq_txqueue);
3197 		txq->vxtxq_ts->npending = 0;
3198 		vmxnet3_write_bar0(sc, VMXNET3_BAR0_TXH(vmxq->vxq_id),
3199 		    txr->vxtxr_head);
3200 	}
3201 
3202 	return (0);
3203 }
3204 
3205 #define VMXNET3_TX_START 1
3206 #define VMXNET3_TX_TRANSMIT 2
3207 static inline void
3208 vmxnet3_tx_common_locked(struct ifnet *ifp, struct vmxnet3_txqueue *txq, int txtype)
3209 {
3210 	struct vmxnet3_softc *sc;
3211 	struct vmxnet3_txring *txr;
3212 	struct mbuf *m_head;
3213 	int tx;
3214 
3215 	sc = ifp->if_softc;
3216 	txr = &txq->vxtxq_cmd_ring;
3217 	tx = 0;
3218 
3219 	VMXNET3_TXQ_LOCK_ASSERT(txq);
3220 
3221 	if (txq->vxtxq_stopping || sc->vmx_link_active == 0)
3222 		return;
3223 
3224 	for (;;) {
3225 		if (txtype == VMXNET3_TX_START)
3226 			IFQ_POLL(&ifp->if_snd, m_head);
3227 		else
3228 			m_head = pcq_peek(txq->vxtxq_interq);
3229 		if (m_head == NULL)
3230 			break;
3231 
3232 		if (vmxnet3_txring_avail(txr) < VMXNET3_TX_MAXSEGS)
3233 			break;
3234 
3235 		if (txtype == VMXNET3_TX_START)
3236 			IFQ_DEQUEUE(&ifp->if_snd, m_head);
3237 		else
3238 			m_head = pcq_get(txq->vxtxq_interq);
3239 		if (m_head == NULL)
3240 			break;
3241 
3242 		if (vmxnet3_txq_encap(txq, &m_head) != 0) {
3243 			if (m_head != NULL)
3244 				m_freem(m_head);
3245 			break;
3246 		}
3247 
3248 		tx++;
3249 		bpf_mtap(ifp, m_head, BPF_D_OUT);
3250 	}
3251 
3252 	if (tx > 0)
3253 		txq->vxtxq_watchdog = VMXNET3_WATCHDOG_TIMEOUT;
3254 }
3255 
3256 static void
3257 vmxnet3_start_locked(struct ifnet *ifp)
3258 {
3259 	struct vmxnet3_softc *sc;
3260 	struct vmxnet3_txqueue *txq;
3261 
3262 	sc = ifp->if_softc;
3263 	txq = &sc->vmx_queue[0].vxq_txqueue;
3264 
3265 	vmxnet3_tx_common_locked(ifp, txq, VMXNET3_TX_START);
3266 }
3267 
3268 void
3269 vmxnet3_start(struct ifnet *ifp)
3270 {
3271 	struct vmxnet3_softc *sc;
3272 	struct vmxnet3_txqueue *txq;
3273 
3274 	sc = ifp->if_softc;
3275 	txq = &sc->vmx_queue[0].vxq_txqueue;
3276 
3277 	VMXNET3_TXQ_LOCK(txq);
3278 	vmxnet3_start_locked(ifp);
3279 	VMXNET3_TXQ_UNLOCK(txq);
3280 }
3281 
3282 static int
3283 vmxnet3_select_txqueue(struct ifnet *ifp, struct mbuf *m __unused)
3284 {
3285 	struct vmxnet3_softc *sc;
3286 	u_int cpuid;
3287 
3288 	sc = ifp->if_softc;
3289 	cpuid = cpu_index(curcpu());
3290 	/*
3291 	 * Furure work
3292 	 * We should select txqueue to even up the load even if ncpu is
3293 	 * different from sc->vmx_ntxqueues. Currently, the load is not
3294 	 * even, that is, when ncpu is six and ntxqueues is four, the load
3295 	 * of vmx_queue[0] and vmx_queue[1] is higher than vmx_queue[2] and
3296 	 * vmx_queue[3] because CPU#4 always uses vmx_queue[0] and CPU#5 always
3297 	 * uses vmx_queue[1].
3298 	 * Furthermore, we should not use random value to select txqueue to
3299 	 * avoid reordering. We should use flow information of mbuf.
3300 	 */
3301 	return cpuid % sc->vmx_ntxqueues;
3302 }
3303 
3304 static void
3305 vmxnet3_transmit_locked(struct ifnet *ifp, struct vmxnet3_txqueue *txq)
3306 {
3307 
3308 	vmxnet3_tx_common_locked(ifp, txq, VMXNET3_TX_TRANSMIT);
3309 }
3310 
3311 static int
3312 vmxnet3_transmit(struct ifnet *ifp, struct mbuf *m)
3313 {
3314 	struct vmxnet3_softc *sc;
3315 	struct vmxnet3_txqueue *txq;
3316 	int qid;
3317 
3318 	qid = vmxnet3_select_txqueue(ifp, m);
3319 	sc = ifp->if_softc;
3320 	txq = &sc->vmx_queue[qid].vxq_txqueue;
3321 
3322 	if (__predict_false(!pcq_put(txq->vxtxq_interq, m))) {
3323 		VMXNET3_TXQ_LOCK(txq);
3324 		txq->vxtxq_pcqdrop.ev_count++;
3325 		VMXNET3_TXQ_UNLOCK(txq);
3326 		m_freem(m);
3327 		return ENOBUFS;
3328 	}
3329 
3330 #ifdef VMXNET3_ALWAYS_TXDEFER
3331 	kpreempt_disable();
3332 	softint_schedule(txq->vxtxq_si);
3333 	kpreempt_enable();
3334 #else
3335 	if (VMXNET3_TXQ_TRYLOCK(txq)) {
3336 		vmxnet3_transmit_locked(ifp, txq);
3337 		VMXNET3_TXQ_UNLOCK(txq);
3338 	} else {
3339 		kpreempt_disable();
3340 		softint_schedule(txq->vxtxq_si);
3341 		kpreempt_enable();
3342 	}
3343 #endif
3344 
3345 	return 0;
3346 }
3347 
3348 static void
3349 vmxnet3_deferred_transmit(void *arg)
3350 {
3351 	struct vmxnet3_txqueue *txq = arg;
3352 	struct vmxnet3_softc *sc = txq->vxtxq_sc;
3353 	struct ifnet *ifp = &sc->vmx_ethercom.ec_if;
3354 
3355 	VMXNET3_TXQ_LOCK(txq);
3356 	txq->vxtxq_transmitdef.ev_count++;
3357 	if (pcq_peek(txq->vxtxq_interq) != NULL)
3358 		vmxnet3_transmit_locked(ifp, txq);
3359 	VMXNET3_TXQ_UNLOCK(txq);
3360 }
3361 
3362 static void
3363 vmxnet3_set_rxfilter(struct vmxnet3_softc *sc)
3364 {
3365 	struct ethercom *ec = &sc->vmx_ethercom;
3366 	struct vmxnet3_driver_shared *ds = sc->vmx_ds;
3367 	struct ether_multi *enm;
3368 	struct ether_multistep step;
3369 	u_int mode;
3370 	uint8_t *p;
3371 
3372 	VMXNET3_CORE_LOCK_ASSERT(sc);
3373 
3374 	ds->mcast_tablelen = 0;
3375 	ETHER_LOCK(ec);
3376 	CLR(ec->ec_flags, ETHER_F_ALLMULTI);
3377 	ETHER_UNLOCK(ec);
3378 
3379 	/*
3380 	 * Always accept broadcast frames.
3381 	 * Always accept frames destined to our station address.
3382 	 */
3383 	mode = VMXNET3_RXMODE_BCAST | VMXNET3_RXMODE_UCAST;
3384 
3385 	ETHER_LOCK(ec);
3386 	if (sc->vmx_promisc ||
3387 	    ec->ec_multicnt > VMXNET3_MULTICAST_MAX)
3388 		goto allmulti;
3389 
3390 	p = sc->vmx_mcast;
3391 	ETHER_FIRST_MULTI(step, ec, enm);
3392 	while (enm != NULL) {
3393 		if (memcmp(enm->enm_addrlo, enm->enm_addrhi, ETHER_ADDR_LEN)) {
3394 			/*
3395 			 * We must listen to a range of multicast addresses.
3396 			 * For now, just accept all multicasts, rather than
3397 			 * trying to set only those filter bits needed to match
3398 			 * the range.  (At this time, the only use of address
3399 			 * ranges is for IP multicast routing, for which the
3400 			 * range is big enough to require all bits set.)
3401 			 */
3402 			goto allmulti;
3403 		}
3404 		memcpy(p, enm->enm_addrlo, ETHER_ADDR_LEN);
3405 
3406 		p += ETHER_ADDR_LEN;
3407 
3408 		ETHER_NEXT_MULTI(step, enm);
3409 	}
3410 
3411 	if (ec->ec_multicnt > 0) {
3412 		SET(mode, VMXNET3_RXMODE_MCAST);
3413 		ds->mcast_tablelen = p - sc->vmx_mcast;
3414 	}
3415 	ETHER_UNLOCK(ec);
3416 
3417 	goto setit;
3418 
3419 allmulti:
3420 	SET(ec->ec_flags, ETHER_F_ALLMULTI);
3421 	ETHER_UNLOCK(ec);
3422 	SET(mode, (VMXNET3_RXMODE_ALLMULTI | VMXNET3_RXMODE_MCAST));
3423 	if (sc->vmx_promisc)
3424 		SET(mode, VMXNET3_RXMODE_PROMISC);
3425 
3426 setit:
3427 	vmxnet3_write_cmd(sc, VMXNET3_CMD_SET_FILTER);
3428 	ds->rxmode = mode;
3429 	vmxnet3_write_cmd(sc, VMXNET3_CMD_SET_RXMODE);
3430 }
3431 
3432 static int
3433 vmxnet3_ioctl(struct ifnet *ifp, u_long cmd, void *data)
3434 {
3435 	struct vmxnet3_softc *sc = ifp->if_softc;
3436 	struct ifreq *ifr = (struct ifreq *)data;
3437 	int s, error = 0;
3438 
3439 	switch (cmd) {
3440 	case SIOCADDMULTI:
3441 	case SIOCDELMULTI:
3442 		break;
3443 	default:
3444 		KASSERT(IFNET_LOCKED(ifp));
3445 	}
3446 
3447 	switch (cmd) {
3448 	case SIOCSIFMTU: {
3449 		int nmtu = ifr->ifr_mtu;
3450 
3451 		if (nmtu < VMXNET3_MIN_MTU || nmtu > VMXNET3_MAX_MTU) {
3452 			error = EINVAL;
3453 			break;
3454 		}
3455 		if (ifp->if_mtu != (uint64_t)nmtu) {
3456 			s = splnet();
3457 			error = ether_ioctl(ifp, cmd, data);
3458 			splx(s);
3459 			if (error == ENETRESET)
3460 				error = vmxnet3_init(ifp);
3461 		}
3462 		break;
3463 	}
3464 
3465 	default:
3466 		s = splnet();
3467 		error = ether_ioctl(ifp, cmd, data);
3468 		splx(s);
3469 	}
3470 
3471 	if (error == ENETRESET) {
3472 		VMXNET3_CORE_LOCK(sc);
3473 		if (sc->vmx_mcastactive)
3474 			vmxnet3_set_rxfilter(sc);
3475 		VMXNET3_CORE_UNLOCK(sc);
3476 		error = 0;
3477 	}
3478 
3479 	return error;
3480 }
3481 
3482 static int
3483 vmxnet3_ifflags_cb(struct ethercom *ec)
3484 {
3485 	struct ifnet *ifp = &ec->ec_if;
3486 	struct vmxnet3_softc *sc = ifp->if_softc;
3487 	int error = 0;
3488 
3489 	KASSERT(IFNET_LOCKED(ifp));
3490 
3491 	VMXNET3_CORE_LOCK(sc);
3492 	const unsigned short changed = ifp->if_flags ^ sc->vmx_if_flags;
3493 	if ((changed & ~(IFF_CANTCHANGE | IFF_DEBUG)) == 0) {
3494 		sc->vmx_if_flags = ifp->if_flags;
3495 		if (changed & IFF_PROMISC) {
3496 			sc->vmx_promisc = ifp->if_flags & IFF_PROMISC;
3497 			error = ENETRESET;
3498 		}
3499 	} else {
3500 		error = ENETRESET;
3501 	}
3502 	VMXNET3_CORE_UNLOCK(sc);
3503 
3504 	vmxnet3_if_link_status(sc);
3505 
3506 	return error;
3507 }
3508 
3509 static int
3510 vmxnet3_watchdog(struct vmxnet3_txqueue *txq)
3511 {
3512 	struct vmxnet3_softc *sc;
3513 	struct vmxnet3_queue *vmxq;
3514 
3515 	sc = txq->vxtxq_sc;
3516 	vmxq = container_of(txq, struct vmxnet3_queue, vxq_txqueue);
3517 
3518 	VMXNET3_TXQ_LOCK(txq);
3519 	if (txq->vxtxq_watchdog == 0 || --txq->vxtxq_watchdog) {
3520 		VMXNET3_TXQ_UNLOCK(txq);
3521 		return (0);
3522 	}
3523 	txq->vxtxq_watchdogto.ev_count++;
3524 	VMXNET3_TXQ_UNLOCK(txq);
3525 
3526 	device_printf(sc->vmx_dev, "watchdog timeout on queue %d\n",
3527 	    vmxq->vxq_id);
3528 	return (1);
3529 }
3530 
3531 static void
3532 vmxnet3_refresh_host_stats(struct vmxnet3_softc *sc)
3533 {
3534 
3535 	vmxnet3_write_cmd(sc, VMXNET3_CMD_GET_STATS);
3536 }
3537 
3538 static void
3539 vmxnet3_tick(void *xsc)
3540 {
3541 	struct vmxnet3_softc *sc;
3542 	int i, timedout;
3543 
3544 	sc = xsc;
3545 	timedout = 0;
3546 
3547 	VMXNET3_CORE_LOCK(sc);
3548 
3549 	vmxnet3_refresh_host_stats(sc);
3550 
3551 	for (i = 0; i < sc->vmx_ntxqueues; i++)
3552 		timedout |= vmxnet3_watchdog(&sc->vmx_queue[i].vxq_txqueue);
3553 
3554 	if (timedout != 0) {
3555 		if (!sc->vmx_reset_pending) {
3556 			sc->vmx_reset_pending = true;
3557 			workqueue_enqueue(sc->vmx_reset_wq,
3558 			    &sc->vmx_reset_work, NULL);
3559 		}
3560 	} else {
3561 		callout_reset(&sc->vmx_tick, hz, vmxnet3_tick, sc);
3562 	}
3563 
3564 	VMXNET3_CORE_UNLOCK(sc);
3565 }
3566 
3567 static void
3568 vmxnet3_reset_work(struct work *work, void *arg)
3569 {
3570 	struct vmxnet3_softc *sc = arg;
3571 	struct ifnet *ifp = &sc->vmx_ethercom.ec_if;
3572 
3573 	VMXNET3_CORE_LOCK(sc);
3574 	KASSERT(sc->vmx_reset_pending);
3575 	sc->vmx_reset_pending = false;
3576 	VMXNET3_CORE_UNLOCK(sc);
3577 
3578 	IFNET_LOCK(ifp);
3579 	(void)vmxnet3_init(ifp);
3580 	IFNET_UNLOCK(ifp);
3581 }
3582 
3583 /*
3584  * update link state of ifnet and softc
3585  */
3586 static void
3587 vmxnet3_if_link_status(struct vmxnet3_softc *sc)
3588 {
3589 	struct ifnet *ifp = &sc->vmx_ethercom.ec_if;
3590 	u_int link;
3591 	bool up;
3592 
3593 	up = vmxnet3_cmd_link_status(ifp);
3594 	if (up) {
3595 		sc->vmx_link_active = 1;
3596 		link = LINK_STATE_UP;
3597 	} else {
3598 		sc->vmx_link_active = 0;
3599 		link = LINK_STATE_DOWN;
3600 	}
3601 
3602 	if_link_state_change(ifp, link);
3603 }
3604 
3605 /*
3606  * check vmx(4) state by VMXNET3_CMD and update ifp->if_baudrate
3607  *   returns
3608  *       - true:  link up
3609  *       - flase: link down
3610  */
3611 static bool
3612 vmxnet3_cmd_link_status(struct ifnet *ifp)
3613 {
3614 	struct vmxnet3_softc *sc = ifp->if_softc;
3615 	u_int x, speed;
3616 
3617 	x = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_LINK);
3618 	if ((x & 1) == 0)
3619 		return false;
3620 
3621 	speed = x >> 16;
3622 	ifp->if_baudrate = IF_Mbps(speed);
3623 	return true;
3624 }
3625 
3626 static void
3627 vmxnet3_ifmedia_status(struct ifnet *ifp, struct ifmediareq *ifmr)
3628 {
3629 	bool up;
3630 
3631 	ifmr->ifm_status = IFM_AVALID;
3632 	ifmr->ifm_active = IFM_ETHER;
3633 
3634 	up = vmxnet3_cmd_link_status(ifp);
3635 	if (!up)
3636 		return;
3637 
3638 	ifmr->ifm_status |= IFM_ACTIVE;
3639 
3640 	if (ifp->if_baudrate >= IF_Gbps(10ULL))
3641 		ifmr->ifm_active |= IFM_10G_T;
3642 }
3643 
3644 static int
3645 vmxnet3_ifmedia_change(struct ifnet *ifp)
3646 {
3647 	return 0;
3648 }
3649 
3650 static void
3651 vmxnet3_set_lladdr(struct vmxnet3_softc *sc)
3652 {
3653 	uint32_t ml, mh;
3654 
3655 	ml  = sc->vmx_lladdr[0];
3656 	ml |= sc->vmx_lladdr[1] << 8;
3657 	ml |= sc->vmx_lladdr[2] << 16;
3658 	ml |= sc->vmx_lladdr[3] << 24;
3659 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_MACL, ml);
3660 
3661 	mh  = sc->vmx_lladdr[4];
3662 	mh |= sc->vmx_lladdr[5] << 8;
3663 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_MACH, mh);
3664 }
3665 
3666 static void
3667 vmxnet3_get_lladdr(struct vmxnet3_softc *sc)
3668 {
3669 	uint32_t ml, mh;
3670 
3671 	ml = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_MACL);
3672 	mh = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_MACH);
3673 
3674 	sc->vmx_lladdr[0] = ml;
3675 	sc->vmx_lladdr[1] = ml >> 8;
3676 	sc->vmx_lladdr[2] = ml >> 16;
3677 	sc->vmx_lladdr[3] = ml >> 24;
3678 	sc->vmx_lladdr[4] = mh;
3679 	sc->vmx_lladdr[5] = mh >> 8;
3680 }
3681 
3682 static void
3683 vmxnet3_enable_all_intrs(struct vmxnet3_softc *sc)
3684 {
3685 	int i;
3686 
3687 	sc->vmx_ds->ictrl &= ~VMXNET3_ICTRL_DISABLE_ALL;
3688 	for (i = 0; i < sc->vmx_nintrs; i++)
3689 		vmxnet3_enable_intr(sc, i);
3690 }
3691 
3692 static void
3693 vmxnet3_disable_all_intrs(struct vmxnet3_softc *sc)
3694 {
3695 	int i;
3696 
3697 	sc->vmx_ds->ictrl |= VMXNET3_ICTRL_DISABLE_ALL;
3698 	for (i = 0; i < sc->vmx_nintrs; i++)
3699 		vmxnet3_disable_intr(sc, i);
3700 }
3701 
3702 static int
3703 vmxnet3_dma_malloc(struct vmxnet3_softc *sc, bus_size_t size, bus_size_t align,
3704     struct vmxnet3_dma_alloc *dma)
3705 {
3706 	bus_dma_tag_t t = sc->vmx_dmat;
3707 	bus_dma_segment_t *segs = dma->dma_segs;
3708 	int n, error;
3709 
3710 	memset(dma, 0, sizeof(*dma));
3711 
3712 	error = bus_dmamem_alloc(t, size, align, 0, segs, 1, &n, BUS_DMA_NOWAIT);
3713 	if (error) {
3714 		aprint_error_dev(sc->vmx_dev, "bus_dmamem_alloc failed: %d\n", error);
3715 		goto fail1;
3716 	}
3717 	KASSERT(n == 1);
3718 
3719 	error = bus_dmamem_map(t, segs, 1, size, &dma->dma_vaddr, BUS_DMA_NOWAIT);
3720 	if (error) {
3721 		aprint_error_dev(sc->vmx_dev, "bus_dmamem_map failed: %d\n", error);
3722 		goto fail2;
3723 	}
3724 
3725 	error = bus_dmamap_create(t, size, 1, size, 0, BUS_DMA_NOWAIT, &dma->dma_map);
3726 	if (error) {
3727 		aprint_error_dev(sc->vmx_dev, "bus_dmamap_create failed: %d\n", error);
3728 		goto fail3;
3729 	}
3730 
3731 	error = bus_dmamap_load(t, dma->dma_map, dma->dma_vaddr, size, NULL,
3732 	    BUS_DMA_NOWAIT);
3733 	if (error) {
3734 		aprint_error_dev(sc->vmx_dev, "bus_dmamap_load failed: %d\n", error);
3735 		goto fail4;
3736 	}
3737 
3738 	memset(dma->dma_vaddr, 0, size);
3739 	dma->dma_paddr = DMAADDR(dma->dma_map);
3740 	dma->dma_size = size;
3741 
3742 	return (0);
3743 fail4:
3744 	bus_dmamap_destroy(t, dma->dma_map);
3745 fail3:
3746 	bus_dmamem_unmap(t, dma->dma_vaddr, size);
3747 fail2:
3748 	bus_dmamem_free(t, segs, 1);
3749 fail1:
3750 	return (error);
3751 }
3752 
3753 static void
3754 vmxnet3_dma_free(struct vmxnet3_softc *sc, struct vmxnet3_dma_alloc *dma)
3755 {
3756 	bus_dma_tag_t t = sc->vmx_dmat;
3757 
3758 	bus_dmamap_unload(t, dma->dma_map);
3759 	bus_dmamap_destroy(t, dma->dma_map);
3760 	bus_dmamem_unmap(t, dma->dma_vaddr, dma->dma_size);
3761 	bus_dmamem_free(t, dma->dma_segs, 1);
3762 
3763 	memset(dma, 0, sizeof(*dma));
3764 }
3765 
3766 MODULE(MODULE_CLASS_DRIVER, if_vmx, "pci");
3767 
3768 #ifdef _MODULE
3769 #include "ioconf.c"
3770 #endif
3771 
3772 static int
3773 if_vmx_modcmd(modcmd_t cmd, void *opaque)
3774 {
3775 	int error = 0;
3776 
3777 	switch (cmd) {
3778 	case MODULE_CMD_INIT:
3779 #ifdef _MODULE
3780 		error = config_init_component(cfdriver_ioconf_if_vmx,
3781 		    cfattach_ioconf_if_vmx, cfdata_ioconf_if_vmx);
3782 #endif
3783 		return error;
3784 	case MODULE_CMD_FINI:
3785 #ifdef _MODULE
3786 		error = config_fini_component(cfdriver_ioconf_if_vmx,
3787 		    cfattach_ioconf_if_vmx, cfdata_ioconf_if_vmx);
3788 #endif
3789 		return error;
3790 	default:
3791 		return ENOTTY;
3792 	}
3793 }
3794 
3795