xref: /netbsd-src/sys/dev/pci/if_vmx.c (revision 53b02e147d4ed531c0d2a5ca9b3e8026ba3e99b5)
1 /*	$NetBSD: if_vmx.c,v 1.5 2021/10/13 01:11:29 knakahara Exp $	*/
2 /*	$OpenBSD: if_vmx.c,v 1.16 2014/01/22 06:04:17 brad Exp $	*/
3 
4 /*
5  * Copyright (c) 2013 Tsubai Masanari
6  * Copyright (c) 2013 Bryan Venteicher <bryanv@FreeBSD.org>
7  *
8  * Permission to use, copy, modify, and distribute this software for any
9  * purpose with or without fee is hereby granted, provided that the above
10  * copyright notice and this permission notice appear in all copies.
11  *
12  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
13  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
14  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
15  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
16  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
17  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
18  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
19  */
20 
21 #include <sys/cdefs.h>
22 __KERNEL_RCSID(0, "$NetBSD: if_vmx.c,v 1.5 2021/10/13 01:11:29 knakahara Exp $");
23 
24 #include <sys/param.h>
25 #include <sys/cpu.h>
26 #include <sys/kernel.h>
27 #include <sys/kmem.h>
28 #include <sys/bitops.h>
29 #include <sys/bus.h>
30 #include <sys/device.h>
31 #include <sys/mbuf.h>
32 #include <sys/module.h>
33 #include <sys/sockio.h>
34 #include <sys/pcq.h>
35 #include <sys/workqueue.h>
36 #include <sys/interrupt.h>
37 
38 #include <net/bpf.h>
39 #include <net/if.h>
40 #include <net/if_ether.h>
41 #include <net/if_media.h>
42 
43 #include <netinet/if_inarp.h>
44 #include <netinet/in_systm.h>	/* for <netinet/ip.h> */
45 #include <netinet/in.h>		/* for <netinet/ip.h> */
46 #include <netinet/ip.h>		/* for struct ip */
47 #include <netinet/ip6.h>	/* for struct ip6_hdr */
48 #include <netinet/tcp.h>	/* for struct tcphdr */
49 #include <netinet/udp.h>	/* for struct udphdr */
50 
51 #include <dev/pci/pcivar.h>
52 #include <dev/pci/pcireg.h>
53 #include <dev/pci/pcidevs.h>
54 
55 #include <dev/pci/if_vmxreg.h>
56 
57 #define VMXNET3_DRIVER_VERSION 0x00010000
58 
59 /*
60  * Max descriptors per Tx packet. We must limit the size of the
61  * any TSO packets based on the number of segments.
62  */
63 #define VMXNET3_TX_MAXSEGS		32
64 #define VMXNET3_TX_MAXSIZE		(VMXNET3_TX_MAXSEGS * MCLBYTES)
65 
66 /*
67  * Maximum support Tx segments size. The length field in the
68  * Tx descriptor is 14 bits.
69  */
70 #define VMXNET3_TX_MAXSEGSIZE		(1 << 14)
71 
72 /*
73  * The maximum number of Rx segments we accept.
74  */
75 #define VMXNET3_MAX_RX_SEGS		0	/* no segments */
76 
77 /*
78  * Predetermined size of the multicast MACs filter table. If the
79  * number of multicast addresses exceeds this size, then the
80  * ALL_MULTI mode is use instead.
81  */
82 #define VMXNET3_MULTICAST_MAX		32
83 
84 /*
85  * Our Tx watchdog timeout.
86  */
87 #define VMXNET3_WATCHDOG_TIMEOUT	5
88 
89 /*
90  * Default value for vmx_intr_{rx,tx}_process_limit which is used for
91  * max number of packets to process for interrupt handler
92  */
93 #define VMXNET3_RX_INTR_PROCESS_LIMIT 0U
94 #define VMXNET3_TX_INTR_PROCESS_LIMIT 256
95 
96 /*
97  * Default value for vmx_{rx,tx}_process_limit which is used for
98  * max number of packets to process for deferred processing
99  */
100 #define VMXNET3_RX_PROCESS_LIMIT 256
101 #define VMXNET3_TX_PROCESS_LIMIT 256
102 
103 #define VMXNET3_WORKQUEUE_PRI PRI_SOFTNET
104 
105 /*
106  * IP protocols that we can perform Tx checksum offloading of.
107  */
108 #define VMXNET3_CSUM_OFFLOAD \
109     (M_CSUM_TCPv4 | M_CSUM_UDPv4)
110 #define VMXNET3_CSUM_OFFLOAD_IPV6 \
111     (M_CSUM_TCPv6 | M_CSUM_UDPv6)
112 
113 #define VMXNET3_CSUM_ALL_OFFLOAD \
114     (VMXNET3_CSUM_OFFLOAD | VMXNET3_CSUM_OFFLOAD_IPV6 | M_CSUM_TSOv4 | M_CSUM_TSOv6)
115 
116 #define VMXNET3_RXRINGS_PERQ 2
117 
118 #define VMXNET3_CORE_LOCK(_sc)		mutex_enter((_sc)->vmx_mtx)
119 #define VMXNET3_CORE_UNLOCK(_sc)	mutex_exit((_sc)->vmx_mtx)
120 #define VMXNET3_CORE_LOCK_ASSERT(_sc)	mutex_owned((_sc)->vmx_mtx)
121 
122 #define VMXNET3_RXQ_LOCK(_rxq)		mutex_enter((_rxq)->vxrxq_mtx)
123 #define VMXNET3_RXQ_UNLOCK(_rxq)	mutex_exit((_rxq)->vxrxq_mtx)
124 #define VMXNET3_RXQ_LOCK_ASSERT(_rxq)		\
125     mutex_owned((_rxq)->vxrxq_mtx)
126 
127 #define VMXNET3_TXQ_LOCK(_txq)		mutex_enter((_txq)->vxtxq_mtx)
128 #define VMXNET3_TXQ_TRYLOCK(_txq)	mutex_tryenter((_txq)->vxtxq_mtx)
129 #define VMXNET3_TXQ_UNLOCK(_txq)	mutex_exit((_txq)->vxtxq_mtx)
130 #define VMXNET3_TXQ_LOCK_ASSERT(_txq)		\
131     mutex_owned((_txq)->vxtxq_mtx)
132 
133 struct vmxnet3_dma_alloc {
134 	bus_addr_t dma_paddr;
135 	void *dma_vaddr;
136 	bus_dmamap_t dma_map;
137 	bus_size_t dma_size;
138 	bus_dma_segment_t dma_segs[1];
139 };
140 
141 struct vmxnet3_txbuf {
142 	bus_dmamap_t vtxb_dmamap;
143 	struct mbuf *vtxb_m;
144 };
145 
146 struct vmxnet3_txring {
147 	struct vmxnet3_txbuf *vxtxr_txbuf;
148 	struct vmxnet3_txdesc *vxtxr_txd;
149 	u_int vxtxr_head;
150 	u_int vxtxr_next;
151 	u_int vxtxr_ndesc;
152 	int vxtxr_gen;
153 	struct vmxnet3_dma_alloc vxtxr_dma;
154 };
155 
156 struct vmxnet3_rxbuf {
157 	bus_dmamap_t vrxb_dmamap;
158 	struct mbuf *vrxb_m;
159 };
160 
161 struct vmxnet3_rxring {
162 	struct vmxnet3_rxbuf *vxrxr_rxbuf;
163 	struct vmxnet3_rxdesc *vxrxr_rxd;
164 	u_int vxrxr_fill;
165 	u_int vxrxr_ndesc;
166 	int vxrxr_gen;
167 	int vxrxr_rid;
168 	struct vmxnet3_dma_alloc vxrxr_dma;
169 	bus_dmamap_t vxrxr_spare_dmap;
170 };
171 
172 struct vmxnet3_comp_ring {
173 	union {
174 		struct vmxnet3_txcompdesc *txcd;
175 		struct vmxnet3_rxcompdesc *rxcd;
176 	} vxcr_u;
177 	u_int vxcr_next;
178 	u_int vxcr_ndesc;
179 	int vxcr_gen;
180 	struct vmxnet3_dma_alloc vxcr_dma;
181 };
182 
183 struct vmxnet3_txq_stats {
184 #if 0
185 	uint64_t vmtxs_opackets;	/* if_opackets */
186 	uint64_t vmtxs_obytes;		/* if_obytes */
187 	uint64_t vmtxs_omcasts;		/* if_omcasts */
188 #endif
189 	uint64_t vmtxs_csum;
190 	uint64_t vmtxs_tso;
191 	uint64_t vmtxs_full;
192 	uint64_t vmtxs_offload_failed;
193 };
194 
195 struct vmxnet3_txqueue {
196 	kmutex_t *vxtxq_mtx;
197 	struct vmxnet3_softc *vxtxq_sc;
198 	int vxtxq_watchdog;
199 	pcq_t *vxtxq_interq;
200 	struct vmxnet3_txring vxtxq_cmd_ring;
201 	struct vmxnet3_comp_ring vxtxq_comp_ring;
202 	struct vmxnet3_txq_stats vxtxq_stats;
203 	struct vmxnet3_txq_shared *vxtxq_ts;
204 	char vxtxq_name[16];
205 
206 	void *vxtxq_si;
207 
208 	struct evcnt vxtxq_intr;
209 	struct evcnt vxtxq_defer;
210 	struct evcnt vxtxq_deferreq;
211 	struct evcnt vxtxq_pcqdrop;
212 	struct evcnt vxtxq_transmitdef;
213 	struct evcnt vxtxq_watchdogto;
214 	struct evcnt vxtxq_defragged;
215 	struct evcnt vxtxq_defrag_failed;
216 };
217 
218 #if 0
219 struct vmxnet3_rxq_stats {
220 	uint64_t vmrxs_ipackets;	/* if_ipackets */
221 	uint64_t vmrxs_ibytes;		/* if_ibytes */
222 	uint64_t vmrxs_iqdrops;		/* if_iqdrops */
223 	uint64_t vmrxs_ierrors;		/* if_ierrors */
224 };
225 #endif
226 
227 struct vmxnet3_rxqueue {
228 	kmutex_t *vxrxq_mtx;
229 	struct vmxnet3_softc *vxrxq_sc;
230 	struct mbuf *vxrxq_mhead;
231 	struct mbuf *vxrxq_mtail;
232 	struct vmxnet3_rxring vxrxq_cmd_ring[VMXNET3_RXRINGS_PERQ];
233 	struct vmxnet3_comp_ring vxrxq_comp_ring;
234 #if 0
235 	struct vmxnet3_rxq_stats vxrxq_stats;
236 #endif
237 	struct vmxnet3_rxq_shared *vxrxq_rs;
238 	char vxrxq_name[16];
239 
240 	struct evcnt vxrxq_intr;
241 	struct evcnt vxrxq_defer;
242 	struct evcnt vxrxq_deferreq;
243 	struct evcnt vxrxq_mgetcl_failed;
244 	struct evcnt vxrxq_mbuf_load_failed;
245 };
246 
247 struct vmxnet3_queue {
248 	int vxq_id;
249 	int vxq_intr_idx;
250 
251 	struct vmxnet3_txqueue vxq_txqueue;
252 	struct vmxnet3_rxqueue vxq_rxqueue;
253 
254 	void *vxq_si;
255 	bool vxq_workqueue;
256 	bool vxq_wq_enqueued;
257 	struct work vxq_wq_cookie;
258 };
259 
260 struct vmxnet3_softc {
261 	device_t vmx_dev;
262 	struct ethercom vmx_ethercom;
263 	struct ifmedia vmx_media;
264 	struct vmxnet3_driver_shared *vmx_ds;
265 	int vmx_flags;
266 #define VMXNET3_FLAG_NO_MSIX	(1 << 0)
267 #define VMXNET3_FLAG_RSS	(1 << 1)
268 #define VMXNET3_FLAG_ATTACHED	(1 << 2)
269 
270 	struct vmxnet3_queue *vmx_queue;
271 
272 	struct pci_attach_args *vmx_pa;
273 	pci_chipset_tag_t vmx_pc;
274 
275 	bus_space_tag_t vmx_iot0;
276 	bus_space_tag_t vmx_iot1;
277 	bus_space_handle_t vmx_ioh0;
278 	bus_space_handle_t vmx_ioh1;
279 	bus_size_t vmx_ios0;
280 	bus_size_t vmx_ios1;
281 	bus_dma_tag_t vmx_dmat;
282 
283 	int vmx_link_active;
284 	int vmx_ntxqueues;
285 	int vmx_nrxqueues;
286 	int vmx_ntxdescs;
287 	int vmx_nrxdescs;
288 	int vmx_max_rxsegs;
289 
290 	struct evcnt vmx_event_intr;
291 	struct evcnt vmx_event_link;
292 	struct evcnt vmx_event_txqerror;
293 	struct evcnt vmx_event_rxqerror;
294 	struct evcnt vmx_event_dic;
295 	struct evcnt vmx_event_debug;
296 
297 	int vmx_intr_type;
298 	int vmx_intr_mask_mode;
299 	int vmx_event_intr_idx;
300 	int vmx_nintrs;
301 	pci_intr_handle_t *vmx_intrs;	/* legacy use vmx_intrs[0] */
302 	void *vmx_ihs[VMXNET3_MAX_INTRS];
303 
304 	kmutex_t *vmx_mtx;
305 
306 	uint8_t *vmx_mcast;
307 	void *vmx_qs;
308 	struct vmxnet3_rss_shared *vmx_rss;
309 	callout_t vmx_tick;
310 	struct vmxnet3_dma_alloc vmx_ds_dma;
311 	struct vmxnet3_dma_alloc vmx_qs_dma;
312 	struct vmxnet3_dma_alloc vmx_mcast_dma;
313 	struct vmxnet3_dma_alloc vmx_rss_dma;
314 	int vmx_max_ntxqueues;
315 	int vmx_max_nrxqueues;
316 	uint8_t vmx_lladdr[ETHER_ADDR_LEN];
317 
318 	u_int vmx_rx_intr_process_limit;
319 	u_int vmx_tx_intr_process_limit;
320 	u_int vmx_rx_process_limit;
321 	u_int vmx_tx_process_limit;
322 	struct sysctllog *vmx_sysctllog;
323 
324 	bool vmx_txrx_workqueue;
325 	struct workqueue *vmx_queue_wq;
326 };
327 
328 #define VMXNET3_STAT
329 
330 #ifdef VMXNET3_STAT
331 struct {
332 	u_int txhead;
333 	u_int txdone;
334 	u_int maxtxlen;
335 	u_int rxdone;
336 	u_int rxfill;
337 	u_int intr;
338 } vmxstat;
339 #endif
340 
341 typedef enum {
342 	VMXNET3_BARRIER_RD,
343 	VMXNET3_BARRIER_WR,
344 	VMXNET3_BARRIER_RDWR,
345 } vmxnet3_barrier_t;
346 
347 #define JUMBO_LEN (MCLBYTES - ETHER_ALIGN)	/* XXX */
348 #define DMAADDR(map) ((map)->dm_segs[0].ds_addr)
349 
350 #define vtophys(va) 0		/* XXX ok? */
351 
352 static int vmxnet3_match(device_t, cfdata_t, void *);
353 static void vmxnet3_attach(device_t, device_t, void *);
354 static int vmxnet3_detach(device_t, int);
355 
356 static int vmxnet3_alloc_pci_resources(struct vmxnet3_softc *);
357 static void vmxnet3_free_pci_resources(struct vmxnet3_softc *);
358 static int vmxnet3_check_version(struct vmxnet3_softc *);
359 static void vmxnet3_check_multiqueue(struct vmxnet3_softc *);
360 
361 static int vmxnet3_alloc_msix_interrupts(struct vmxnet3_softc *);
362 static int vmxnet3_alloc_msi_interrupts(struct vmxnet3_softc *);
363 static int vmxnet3_alloc_legacy_interrupts(struct vmxnet3_softc *);
364 static int vmxnet3_alloc_interrupts(struct vmxnet3_softc *);
365 static void vmxnet3_free_interrupts(struct vmxnet3_softc *);
366 
367 static int vmxnet3_setup_msix_interrupts(struct vmxnet3_softc *);
368 static int vmxnet3_setup_msi_interrupt(struct vmxnet3_softc *);
369 static int vmxnet3_setup_legacy_interrupt(struct vmxnet3_softc *);
370 static void vmxnet3_set_interrupt_idx(struct vmxnet3_softc *);
371 static int vmxnet3_setup_interrupts(struct vmxnet3_softc *);
372 static int vmxnet3_setup_sysctl(struct vmxnet3_softc *);
373 
374 static int vmxnet3_setup_stats(struct vmxnet3_softc *);
375 static void vmxnet3_teardown_stats(struct vmxnet3_softc *);
376 
377 static int vmxnet3_init_rxq(struct vmxnet3_softc *, int);
378 static int vmxnet3_init_txq(struct vmxnet3_softc *, int);
379 static int vmxnet3_alloc_rxtx_queues(struct vmxnet3_softc *);
380 static void vmxnet3_destroy_rxq(struct vmxnet3_rxqueue *);
381 static void vmxnet3_destroy_txq(struct vmxnet3_txqueue *);
382 static void vmxnet3_free_rxtx_queues(struct vmxnet3_softc *);
383 
384 static int vmxnet3_alloc_shared_data(struct vmxnet3_softc *);
385 static void vmxnet3_free_shared_data(struct vmxnet3_softc *);
386 static int vmxnet3_alloc_txq_data(struct vmxnet3_softc *);
387 static void vmxnet3_free_txq_data(struct vmxnet3_softc *);
388 static int vmxnet3_alloc_rxq_data(struct vmxnet3_softc *);
389 static void vmxnet3_free_rxq_data(struct vmxnet3_softc *);
390 static int vmxnet3_alloc_queue_data(struct vmxnet3_softc *);
391 static void vmxnet3_free_queue_data(struct vmxnet3_softc *);
392 static int vmxnet3_alloc_mcast_table(struct vmxnet3_softc *);
393 static void vmxnet3_free_mcast_table(struct vmxnet3_softc *);
394 static void vmxnet3_init_shared_data(struct vmxnet3_softc *);
395 static void vmxnet3_reinit_rss_shared_data(struct vmxnet3_softc *);
396 static void vmxnet3_reinit_shared_data(struct vmxnet3_softc *);
397 static int vmxnet3_alloc_data(struct vmxnet3_softc *);
398 static void vmxnet3_free_data(struct vmxnet3_softc *);
399 static int vmxnet3_setup_interface(struct vmxnet3_softc *);
400 
401 static void vmxnet3_evintr(struct vmxnet3_softc *);
402 static bool vmxnet3_txq_eof(struct vmxnet3_txqueue *, u_int);
403 static int vmxnet3_newbuf(struct vmxnet3_softc *, struct vmxnet3_rxqueue *,
404     struct vmxnet3_rxring *);
405 static void vmxnet3_rxq_eof_discard(struct vmxnet3_rxqueue *,
406     struct vmxnet3_rxring *, int);
407 static void vmxnet3_rxq_discard_chain(struct vmxnet3_rxqueue *);
408 static void vmxnet3_rx_csum(struct vmxnet3_rxcompdesc *, struct mbuf *);
409 static void vmxnet3_rxq_input(struct vmxnet3_rxqueue *,
410     struct vmxnet3_rxcompdesc *, struct mbuf *);
411 static bool vmxnet3_rxq_eof(struct vmxnet3_rxqueue *, u_int);
412 static int vmxnet3_legacy_intr(void *);
413 static int vmxnet3_txrxq_intr(void *);
414 static void vmxnet3_handle_queue(void *);
415 static void vmxnet3_handle_queue_work(struct work *, void *);
416 static int vmxnet3_event_intr(void *);
417 
418 static void vmxnet3_txstop(struct vmxnet3_softc *, struct vmxnet3_txqueue *);
419 static void vmxnet3_rxstop(struct vmxnet3_softc *, struct vmxnet3_rxqueue *);
420 static void vmxnet3_stop_locked(struct vmxnet3_softc *);
421 static void vmxnet3_stop_rendezvous(struct vmxnet3_softc *);
422 static void vmxnet3_stop(struct ifnet *, int);
423 
424 static void vmxnet3_txinit(struct vmxnet3_softc *, struct vmxnet3_txqueue *);
425 static int vmxnet3_rxinit(struct vmxnet3_softc *, struct vmxnet3_rxqueue *);
426 static int vmxnet3_reinit_queues(struct vmxnet3_softc *);
427 static int vmxnet3_enable_device(struct vmxnet3_softc *);
428 static void vmxnet3_reinit_rxfilters(struct vmxnet3_softc *);
429 static int vmxnet3_reinit(struct vmxnet3_softc *);
430 
431 static int vmxnet3_init_locked(struct vmxnet3_softc *);
432 static int vmxnet3_init(struct ifnet *);
433 
434 static int vmxnet3_txq_offload_ctx(struct vmxnet3_txqueue *, struct mbuf *, int *, int *);
435 static int vmxnet3_txq_load_mbuf(struct vmxnet3_txqueue *, struct mbuf **, bus_dmamap_t);
436 static void vmxnet3_txq_unload_mbuf(struct vmxnet3_txqueue *, bus_dmamap_t);
437 static int vmxnet3_txq_encap(struct vmxnet3_txqueue *, struct mbuf **);
438 static void vmxnet3_start_locked(struct ifnet *);
439 static void vmxnet3_start(struct ifnet *);
440 static void vmxnet3_transmit_locked(struct ifnet *, struct vmxnet3_txqueue *);
441 static int vmxnet3_transmit(struct ifnet *, struct mbuf *);
442 static void vmxnet3_deferred_transmit(void *);
443 
444 static void vmxnet3_set_rxfilter(struct vmxnet3_softc *);
445 static int vmxnet3_ioctl(struct ifnet *, u_long, void *);
446 static int vmxnet3_ifflags_cb(struct ethercom *);
447 
448 static int vmxnet3_watchdog(struct vmxnet3_txqueue *);
449 static void vmxnet3_refresh_host_stats(struct vmxnet3_softc *);
450 static void vmxnet3_tick(void *);
451 static void vmxnet3_if_link_status(struct vmxnet3_softc *);
452 static bool vmxnet3_cmd_link_status(struct ifnet *);
453 static void vmxnet3_ifmedia_status(struct ifnet *, struct ifmediareq *);
454 static int vmxnet3_ifmedia_change(struct ifnet *);
455 static void vmxnet3_set_lladdr(struct vmxnet3_softc *);
456 static void vmxnet3_get_lladdr(struct vmxnet3_softc *);
457 
458 static void vmxnet3_enable_all_intrs(struct vmxnet3_softc *);
459 static void vmxnet3_disable_all_intrs(struct vmxnet3_softc *);
460 
461 static int vmxnet3_dma_malloc(struct vmxnet3_softc *, bus_size_t, bus_size_t,
462     struct vmxnet3_dma_alloc *);
463 static void vmxnet3_dma_free(struct vmxnet3_softc *, struct vmxnet3_dma_alloc *);
464 
465 CFATTACH_DECL3_NEW(vmx, sizeof(struct vmxnet3_softc),
466     vmxnet3_match, vmxnet3_attach, vmxnet3_detach, NULL, NULL, NULL, 0);
467 
468 /* round down to the nearest power of 2 */
469 static int
470 vmxnet3_calc_queue_size(int n)
471 {
472 
473 	if (__predict_false(n <= 0))
474 		return 1;
475 
476 	return (1U << (fls32(n) - 1));
477 }
478 
479 static inline void
480 vmxnet3_write_bar0(struct vmxnet3_softc *sc, bus_size_t r, uint32_t v)
481 {
482 
483 	bus_space_write_4(sc->vmx_iot0, sc->vmx_ioh0, r, v);
484 }
485 
486 static inline uint32_t
487 vmxnet3_read_bar1(struct vmxnet3_softc *sc, bus_size_t r)
488 {
489 
490 	return (bus_space_read_4(sc->vmx_iot1, sc->vmx_ioh1, r));
491 }
492 
493 static inline void
494 vmxnet3_write_bar1(struct vmxnet3_softc *sc, bus_size_t r, uint32_t v)
495 {
496 
497 	bus_space_write_4(sc->vmx_iot1, sc->vmx_ioh1, r, v);
498 }
499 
500 static inline void
501 vmxnet3_write_cmd(struct vmxnet3_softc *sc, uint32_t cmd)
502 {
503 
504 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_CMD, cmd);
505 }
506 
507 static inline uint32_t
508 vmxnet3_read_cmd(struct vmxnet3_softc *sc, uint32_t cmd)
509 {
510 
511 	vmxnet3_write_cmd(sc, cmd);
512 	return (vmxnet3_read_bar1(sc, VMXNET3_BAR1_CMD));
513 }
514 
515 static inline void
516 vmxnet3_enable_intr(struct vmxnet3_softc *sc, int irq)
517 {
518 	vmxnet3_write_bar0(sc, VMXNET3_BAR0_IMASK(irq), 0);
519 }
520 
521 static inline void
522 vmxnet3_disable_intr(struct vmxnet3_softc *sc, int irq)
523 {
524 	vmxnet3_write_bar0(sc, VMXNET3_BAR0_IMASK(irq), 1);
525 }
526 
527 static inline void
528 vmxnet3_rxr_increment_fill(struct vmxnet3_rxring *rxr)
529 {
530 
531 	if (++rxr->vxrxr_fill == rxr->vxrxr_ndesc) {
532 		rxr->vxrxr_fill = 0;
533 		rxr->vxrxr_gen ^= 1;
534 	}
535 }
536 
537 static inline int
538 vmxnet3_txring_avail(struct vmxnet3_txring *txr)
539 {
540 	int avail = txr->vxtxr_next - txr->vxtxr_head - 1;
541 	return (avail < 0 ? (int)txr->vxtxr_ndesc + avail : avail);
542 }
543 
544 /*
545  * Since this is a purely paravirtualized device, we do not have
546  * to worry about DMA coherency. But at times, we must make sure
547  * both the compiler and CPU do not reorder memory operations.
548  */
549 static inline void
550 vmxnet3_barrier(struct vmxnet3_softc *sc, vmxnet3_barrier_t type)
551 {
552 
553 	switch (type) {
554 	case VMXNET3_BARRIER_RD:
555 		membar_consumer();
556 		break;
557 	case VMXNET3_BARRIER_WR:
558 		membar_producer();
559 		break;
560 	case VMXNET3_BARRIER_RDWR:
561 		membar_sync();
562 		break;
563 	default:
564 		panic("%s: bad barrier type %d", __func__, type);
565 	}
566 }
567 
568 static int
569 vmxnet3_match(device_t parent, cfdata_t match, void *aux)
570 {
571 	struct pci_attach_args *pa = (struct pci_attach_args *)aux;
572 
573 	if (PCI_VENDOR(pa->pa_id) == PCI_VENDOR_VMWARE &&
574 	    PCI_PRODUCT(pa->pa_id) == PCI_PRODUCT_VMWARE_VMXNET3)
575 		return 1;
576 
577 	return 0;
578 }
579 
580 static void
581 vmxnet3_attach(device_t parent, device_t self, void *aux)
582 {
583 	struct vmxnet3_softc *sc = device_private(self);
584 	struct pci_attach_args *pa = aux;
585 	pcireg_t preg;
586 	int error;
587 	int candidate;
588 
589 	sc->vmx_dev = self;
590 	sc->vmx_pa = pa;
591 	sc->vmx_pc = pa->pa_pc;
592 	if (pci_dma64_available(pa))
593 		sc->vmx_dmat = pa->pa_dmat64;
594 	else
595 		sc->vmx_dmat = pa->pa_dmat;
596 
597 	pci_aprint_devinfo_fancy(pa, "Ethernet controller", "vmxnet3", 1);
598 
599 	preg = pci_conf_read(pa->pa_pc, pa->pa_tag, PCI_COMMAND_STATUS_REG);
600 	preg |= PCI_COMMAND_MASTER_ENABLE;
601 	pci_conf_write(pa->pa_pc, pa->pa_tag, PCI_COMMAND_STATUS_REG, preg);
602 
603 	sc->vmx_mtx = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NET);
604 	callout_init(&sc->vmx_tick, CALLOUT_MPSAFE);
605 
606 	candidate = MIN(MIN(VMXNET3_MAX_TX_QUEUES, VMXNET3_MAX_RX_QUEUES),
607 	    ncpu);
608 	sc->vmx_max_ntxqueues = sc->vmx_max_nrxqueues =
609 	    vmxnet3_calc_queue_size(candidate);
610 	sc->vmx_ntxdescs = 512;
611 	sc->vmx_nrxdescs = 256;
612 	sc->vmx_max_rxsegs = VMXNET3_MAX_RX_SEGS;
613 
614 	error = vmxnet3_alloc_pci_resources(sc);
615 	if (error)
616 		return;
617 
618 	error = vmxnet3_check_version(sc);
619 	if (error)
620 		return;
621 
622 	error = vmxnet3_alloc_rxtx_queues(sc);
623 	if (error)
624 		return;
625 
626 	error = vmxnet3_alloc_interrupts(sc);
627 	if (error)
628 		return;
629 
630 	vmxnet3_check_multiqueue(sc);
631 
632 	error = vmxnet3_alloc_data(sc);
633 	if (error)
634 		return;
635 
636 	error = vmxnet3_setup_interface(sc);
637 	if (error)
638 		return;
639 
640 	error = vmxnet3_setup_interrupts(sc);
641 	if (error)
642 		return;
643 
644 	error = vmxnet3_setup_sysctl(sc);
645 	if (error)
646 		return;
647 
648 	error = vmxnet3_setup_stats(sc);
649 	if (error)
650 		return;
651 
652 	sc->vmx_flags |= VMXNET3_FLAG_ATTACHED;
653 }
654 
655 static int
656 vmxnet3_detach(device_t self, int flags)
657 {
658 	struct vmxnet3_softc *sc;
659 	struct ifnet *ifp;
660 
661 	sc = device_private(self);
662 	ifp = &sc->vmx_ethercom.ec_if;
663 
664 	if (sc->vmx_flags & VMXNET3_FLAG_ATTACHED) {
665 		VMXNET3_CORE_LOCK(sc);
666 		vmxnet3_stop_locked(sc);
667 		callout_halt(&sc->vmx_tick, sc->vmx_mtx);
668 		callout_destroy(&sc->vmx_tick);
669 		VMXNET3_CORE_UNLOCK(sc);
670 
671 		ether_ifdetach(ifp);
672 		if_detach(ifp);
673 		ifmedia_fini(&sc->vmx_media);
674 	}
675 
676 	vmxnet3_teardown_stats(sc);
677 	sysctl_teardown(&sc->vmx_sysctllog);
678 
679 	vmxnet3_free_interrupts(sc);
680 
681 	vmxnet3_free_data(sc);
682 	vmxnet3_free_pci_resources(sc);
683 	vmxnet3_free_rxtx_queues(sc);
684 
685 	if (sc->vmx_mtx)
686 		mutex_obj_free(sc->vmx_mtx);
687 
688 	return (0);
689 }
690 
691 static int
692 vmxnet3_alloc_pci_resources(struct vmxnet3_softc *sc)
693 {
694 	struct pci_attach_args *pa = sc->vmx_pa;
695 	pcireg_t memtype;
696 
697 	memtype = pci_mapreg_type(pa->pa_pc, pa->pa_tag, PCI_BAR(0));
698 	if (pci_mapreg_map(pa, PCI_BAR(0), memtype, 0, &sc->vmx_iot0, &sc->vmx_ioh0,
699 	    NULL, &sc->vmx_ios0)) {
700 		aprint_error_dev(sc->vmx_dev, "failed to map BAR0\n");
701 		return (ENXIO);
702 	}
703 	memtype = pci_mapreg_type(pa->pa_pc, pa->pa_tag, PCI_BAR(1));
704 	if (pci_mapreg_map(pa, PCI_BAR(1), memtype, 0, &sc->vmx_iot1, &sc->vmx_ioh1,
705 	    NULL, &sc->vmx_ios1)) {
706 		aprint_error_dev(sc->vmx_dev, "failed to map BAR1\n");
707 		return (ENXIO);
708 	}
709 
710 	if (!pci_get_capability(pa->pa_pc, pa->pa_tag, PCI_CAP_MSIX, NULL, NULL)) {
711 		sc->vmx_flags |= VMXNET3_FLAG_NO_MSIX;
712 		return (0);
713 	}
714 
715 	return (0);
716 }
717 
718 static void
719 vmxnet3_free_pci_resources(struct vmxnet3_softc *sc)
720 {
721 
722 	if (sc->vmx_ios0) {
723 		bus_space_unmap(sc->vmx_iot0, sc->vmx_ioh0, sc->vmx_ios0);
724 		sc->vmx_ios0 = 0;
725 	}
726 
727 	if (sc->vmx_ios1) {
728 		bus_space_unmap(sc->vmx_iot1, sc->vmx_ioh1, sc->vmx_ios1);
729 		sc->vmx_ios1 = 0;
730 	}
731 }
732 
733 static int
734 vmxnet3_check_version(struct vmxnet3_softc *sc)
735 {
736 	u_int ver;
737 
738 	ver = vmxnet3_read_bar1(sc, VMXNET3_BAR1_VRRS);
739 	if ((ver & 0x1) == 0) {
740 		aprint_error_dev(sc->vmx_dev,
741 		    "unsupported hardware version 0x%x\n", ver);
742 		return (ENOTSUP);
743 	}
744 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_VRRS, 1);
745 
746 	ver = vmxnet3_read_bar1(sc, VMXNET3_BAR1_UVRS);
747 	if ((ver & 0x1) == 0) {
748 		aprint_error_dev(sc->vmx_dev,
749 		    "incompatiable UPT version 0x%x\n", ver);
750 		return (ENOTSUP);
751 	}
752 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_UVRS, 1);
753 
754 	return (0);
755 }
756 
757 static void
758 vmxnet3_check_multiqueue(struct vmxnet3_softc *sc)
759 {
760 
761 	if (sc->vmx_intr_type != VMXNET3_IT_MSIX)
762 		goto out;
763 
764 	/* Just use the maximum configured for now. */
765 	sc->vmx_nrxqueues = sc->vmx_max_nrxqueues;
766 	sc->vmx_ntxqueues = sc->vmx_max_ntxqueues;
767 
768 	if (sc->vmx_nrxqueues > 1)
769 		sc->vmx_flags |= VMXNET3_FLAG_RSS;
770 
771 	return;
772 
773 out:
774 	sc->vmx_ntxqueues = 1;
775 	sc->vmx_nrxqueues = 1;
776 }
777 
778 static int
779 vmxnet3_alloc_msix_interrupts(struct vmxnet3_softc *sc)
780 {
781 	int required;
782 	struct pci_attach_args *pa = sc->vmx_pa;
783 
784 	if (sc->vmx_flags & VMXNET3_FLAG_NO_MSIX)
785 		return (1);
786 
787 	/* Allocate an additional vector for the events interrupt. */
788 	required = MIN(sc->vmx_max_ntxqueues, sc->vmx_max_nrxqueues) + 1;
789 
790 	if (pci_msix_count(pa->pa_pc, pa->pa_tag) < required)
791 		return (1);
792 
793 	if (pci_msix_alloc_exact(pa, &sc->vmx_intrs, required) == 0) {
794 		sc->vmx_nintrs = required;
795 		return (0);
796 	}
797 
798 	return (1);
799 }
800 
801 static int
802 vmxnet3_alloc_msi_interrupts(struct vmxnet3_softc *sc)
803 {
804 	int nmsi, required;
805 	struct pci_attach_args *pa = sc->vmx_pa;
806 
807 	required = 1;
808 
809 	nmsi = pci_msi_count(pa->pa_pc, pa->pa_tag);
810 	if (nmsi < required)
811 		return (1);
812 
813 	if (pci_msi_alloc_exact(pa, &sc->vmx_intrs, required) == 0) {
814 		sc->vmx_nintrs = required;
815 		return (0);
816 	}
817 
818 	return (1);
819 }
820 
821 static int
822 vmxnet3_alloc_legacy_interrupts(struct vmxnet3_softc *sc)
823 {
824 
825 	if (pci_intx_alloc(sc->vmx_pa, &sc->vmx_intrs) == 0) {
826 		sc->vmx_nintrs = 1;
827 		return (0);
828 	}
829 
830 	return (1);
831 }
832 
833 static int
834 vmxnet3_alloc_interrupts(struct vmxnet3_softc *sc)
835 {
836 	u_int config;
837 	int error;
838 
839 	config = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_INTRCFG);
840 
841 	sc->vmx_intr_type = config & 0x03;
842 	sc->vmx_intr_mask_mode = (config >> 2) & 0x03;
843 
844 	switch (sc->vmx_intr_type) {
845 	case VMXNET3_IT_AUTO:
846 		sc->vmx_intr_type = VMXNET3_IT_MSIX;
847 		/* FALLTHROUGH */
848 	case VMXNET3_IT_MSIX:
849 		error = vmxnet3_alloc_msix_interrupts(sc);
850 		if (error == 0)
851 			break;
852 		sc->vmx_intr_type = VMXNET3_IT_MSI;
853 		/* FALLTHROUGH */
854 	case VMXNET3_IT_MSI:
855 		error = vmxnet3_alloc_msi_interrupts(sc);
856 		if (error == 0)
857 			break;
858 		sc->vmx_intr_type = VMXNET3_IT_LEGACY;
859 		/* FALLTHROUGH */
860 	case VMXNET3_IT_LEGACY:
861 		error = vmxnet3_alloc_legacy_interrupts(sc);
862 		if (error == 0)
863 			break;
864 		/* FALLTHROUGH */
865 	default:
866 		sc->vmx_intr_type = -1;
867 		aprint_error_dev(sc->vmx_dev, "cannot allocate any interrupt resources\n");
868 		return (ENXIO);
869 	}
870 
871 	return (error);
872 }
873 
874 static void
875 vmxnet3_free_interrupts(struct vmxnet3_softc *sc)
876 {
877 	pci_chipset_tag_t pc = sc->vmx_pc;
878 	int i;
879 
880 	workqueue_destroy(sc->vmx_queue_wq);
881 	for (i = 0; i < sc->vmx_ntxqueues; i++) {
882 		struct vmxnet3_queue *vmxq =  &sc->vmx_queue[i];
883 
884 		softint_disestablish(vmxq->vxq_si);
885 		vmxq->vxq_si = NULL;
886 	}
887 	for (i = 0; i < sc->vmx_nintrs; i++) {
888 		pci_intr_disestablish(pc, sc->vmx_ihs[i]);
889 	}
890 	pci_intr_release(pc, sc->vmx_intrs, sc->vmx_nintrs);
891 }
892 
893 static int
894 vmxnet3_setup_msix_interrupts(struct vmxnet3_softc *sc)
895 {
896 	pci_chipset_tag_t pc = sc->vmx_pa->pa_pc;
897 	struct vmxnet3_queue *vmxq;
898 	pci_intr_handle_t *intr;
899 	void **ihs;
900 	int intr_idx, i, use_queues, error;
901 	kcpuset_t *affinity;
902 	const char *intrstr;
903 	char intrbuf[PCI_INTRSTR_LEN];
904 	char xnamebuf[32];
905 
906 	intr = sc->vmx_intrs;
907 	intr_idx = 0;
908 	ihs = sc->vmx_ihs;
909 
910 	/* See vmxnet3_alloc_msix_interrupts() */
911 	use_queues = MIN(sc->vmx_max_ntxqueues, sc->vmx_max_nrxqueues);
912 	for (i = 0; i < use_queues; i++, intr++, ihs++, intr_idx++) {
913 		snprintf(xnamebuf, 32, "%s: txrx %d", device_xname(sc->vmx_dev), i);
914 
915 		vmxq = &sc->vmx_queue[i];
916 
917 		intrstr = pci_intr_string(pc, *intr, intrbuf, sizeof(intrbuf));
918 
919 		pci_intr_setattr(pc, intr, PCI_INTR_MPSAFE, true);
920 		*ihs = pci_intr_establish_xname(pc, *intr, IPL_NET,
921 		    vmxnet3_txrxq_intr, vmxq, xnamebuf);
922 		if (*ihs == NULL) {
923 			aprint_error_dev(sc->vmx_dev,
924 			    "unable to establish txrx interrupt at %s\n", intrstr);
925 			return (-1);
926 		}
927 		aprint_normal_dev(sc->vmx_dev, "txrx interrupting at %s\n", intrstr);
928 
929 		kcpuset_create(&affinity, true);
930 		kcpuset_set(affinity, intr_idx % ncpu);
931 		error = interrupt_distribute(*ihs, affinity, NULL);
932 		if (error) {
933 			aprint_normal_dev(sc->vmx_dev,
934 			    "%s cannot be changed affinity, use default CPU\n",
935 			    intrstr);
936 		}
937 		kcpuset_destroy(affinity);
938 
939 		vmxq->vxq_si = softint_establish(SOFTINT_NET | SOFTINT_MPSAFE,
940 		    vmxnet3_handle_queue, vmxq);
941 		if (vmxq->vxq_si == NULL) {
942 			aprint_error_dev(sc->vmx_dev,
943 			    "softint_establish for vxq_si failed\n");
944 			return (-1);
945 		}
946 
947 		vmxq->vxq_intr_idx = intr_idx;
948 	}
949 	snprintf(xnamebuf, MAXCOMLEN, "%s_tx_rx", device_xname(sc->vmx_dev));
950 	error = workqueue_create(&sc->vmx_queue_wq, xnamebuf,
951 	    vmxnet3_handle_queue_work, sc, VMXNET3_WORKQUEUE_PRI, IPL_NET,
952 	    WQ_PERCPU | WQ_MPSAFE);
953 	if (error) {
954 		aprint_error_dev(sc->vmx_dev, "workqueue_create failed\n");
955 		return (-1);
956 	}
957 	sc->vmx_txrx_workqueue = false;
958 
959 	intrstr = pci_intr_string(pc, *intr, intrbuf, sizeof(intrbuf));
960 
961 	snprintf(xnamebuf, 32, "%s: link", device_xname(sc->vmx_dev));
962 	pci_intr_setattr(pc, intr, PCI_INTR_MPSAFE, true);
963 	*ihs = pci_intr_establish_xname(pc, *intr, IPL_NET,
964 	    vmxnet3_event_intr, sc, xnamebuf);
965 	if (*ihs == NULL) {
966 		aprint_error_dev(sc->vmx_dev,
967 		    "unable to establish event interrupt at %s\n", intrstr);
968 		return (-1);
969 	}
970 	aprint_normal_dev(sc->vmx_dev, "event interrupting at %s\n", intrstr);
971 
972 	sc->vmx_event_intr_idx = intr_idx;
973 
974 	return (0);
975 }
976 
977 static int
978 vmxnet3_setup_msi_interrupt(struct vmxnet3_softc *sc)
979 {
980 	pci_chipset_tag_t pc = sc->vmx_pa->pa_pc;
981 	pci_intr_handle_t *intr;
982 	void **ihs;
983 	struct vmxnet3_queue *vmxq;
984 	int i;
985 	const char *intrstr;
986 	char intrbuf[PCI_INTRSTR_LEN];
987 	char xnamebuf[32];
988 
989 	intr = &sc->vmx_intrs[0];
990 	ihs = sc->vmx_ihs;
991 	vmxq = &sc->vmx_queue[0];
992 
993 	intrstr = pci_intr_string(pc, *intr, intrbuf, sizeof(intrbuf));
994 
995 	snprintf(xnamebuf, 32, "%s: msi", device_xname(sc->vmx_dev));
996 	pci_intr_setattr(pc, intr, PCI_INTR_MPSAFE, true);
997 	*ihs = pci_intr_establish_xname(pc, *intr, IPL_NET,
998 	    vmxnet3_legacy_intr, sc, xnamebuf);
999 	if (*ihs == NULL) {
1000 		aprint_error_dev(sc->vmx_dev,
1001 		    "unable to establish interrupt at %s\n", intrstr);
1002 		return (-1);
1003 	}
1004 	aprint_normal_dev(sc->vmx_dev, "interrupting at %s\n", intrstr);
1005 
1006 	vmxq->vxq_si = softint_establish(SOFTINT_NET | SOFTINT_MPSAFE,
1007 	    vmxnet3_handle_queue, vmxq);
1008 	if (vmxq->vxq_si == NULL) {
1009 		aprint_error_dev(sc->vmx_dev,
1010 		    "softint_establish for vxq_si failed\n");
1011 		return (-1);
1012 	}
1013 
1014 	for (i = 0; i < MIN(sc->vmx_nrxqueues, sc->vmx_nrxqueues); i++)
1015 		sc->vmx_queue[i].vxq_intr_idx = 0;
1016 	sc->vmx_event_intr_idx = 0;
1017 
1018 	return (0);
1019 }
1020 
1021 static int
1022 vmxnet3_setup_legacy_interrupt(struct vmxnet3_softc *sc)
1023 {
1024 	pci_chipset_tag_t pc = sc->vmx_pa->pa_pc;
1025 	pci_intr_handle_t *intr;
1026 	void **ihs;
1027 	struct vmxnet3_queue *vmxq;
1028 	int i;
1029 	const char *intrstr;
1030 	char intrbuf[PCI_INTRSTR_LEN];
1031 	char xnamebuf[32];
1032 
1033 	intr = &sc->vmx_intrs[0];
1034 	ihs = sc->vmx_ihs;
1035 	vmxq = &sc->vmx_queue[0];
1036 
1037 	intrstr = pci_intr_string(pc, *intr, intrbuf, sizeof(intrbuf));
1038 
1039 	snprintf(xnamebuf, 32, "%s:legacy", device_xname(sc->vmx_dev));
1040 	pci_intr_setattr(pc, intr, PCI_INTR_MPSAFE, true);
1041 	*ihs = pci_intr_establish_xname(pc, *intr, IPL_NET,
1042 	    vmxnet3_legacy_intr, sc, xnamebuf);
1043 	if (*ihs == NULL) {
1044 		aprint_error_dev(sc->vmx_dev,
1045 		    "unable to establish interrupt at %s\n", intrstr);
1046 		return (-1);
1047 	}
1048 	aprint_normal_dev(sc->vmx_dev, "interrupting at %s\n", intrstr);
1049 
1050 	vmxq->vxq_si = softint_establish(SOFTINT_NET | SOFTINT_MPSAFE,
1051 	    vmxnet3_handle_queue, vmxq);
1052 	if (vmxq->vxq_si == NULL) {
1053 		aprint_error_dev(sc->vmx_dev,
1054 		    "softint_establish for vxq_si failed\n");
1055 		return (-1);
1056 	}
1057 
1058 	for (i = 0; i < MIN(sc->vmx_nrxqueues, sc->vmx_nrxqueues); i++)
1059 		sc->vmx_queue[i].vxq_intr_idx = 0;
1060 	sc->vmx_event_intr_idx = 0;
1061 
1062 	return (0);
1063 }
1064 
1065 static void
1066 vmxnet3_set_interrupt_idx(struct vmxnet3_softc *sc)
1067 {
1068 	struct vmxnet3_queue *vmxq;
1069 	struct vmxnet3_txqueue *txq;
1070 	struct vmxnet3_txq_shared *txs;
1071 	struct vmxnet3_rxqueue *rxq;
1072 	struct vmxnet3_rxq_shared *rxs;
1073 	int i;
1074 
1075 	sc->vmx_ds->evintr = sc->vmx_event_intr_idx;
1076 
1077 	for (i = 0; i < sc->vmx_ntxqueues; i++) {
1078 		vmxq = &sc->vmx_queue[i];
1079 		txq = &vmxq->vxq_txqueue;
1080 		txs = txq->vxtxq_ts;
1081 		txs->intr_idx = vmxq->vxq_intr_idx;
1082 	}
1083 
1084 	for (i = 0; i < sc->vmx_nrxqueues; i++) {
1085 		vmxq = &sc->vmx_queue[i];
1086 		rxq = &vmxq->vxq_rxqueue;
1087 		rxs = rxq->vxrxq_rs;
1088 		rxs->intr_idx = vmxq->vxq_intr_idx;
1089 	}
1090 }
1091 
1092 static int
1093 vmxnet3_setup_interrupts(struct vmxnet3_softc *sc)
1094 {
1095 	int error;
1096 
1097 	switch (sc->vmx_intr_type) {
1098 	case VMXNET3_IT_MSIX:
1099 		error = vmxnet3_setup_msix_interrupts(sc);
1100 		break;
1101 	case VMXNET3_IT_MSI:
1102 		error = vmxnet3_setup_msi_interrupt(sc);
1103 		break;
1104 	case VMXNET3_IT_LEGACY:
1105 		error = vmxnet3_setup_legacy_interrupt(sc);
1106 		break;
1107 	default:
1108 		panic("%s: invalid interrupt type %d", __func__,
1109 		    sc->vmx_intr_type);
1110 	}
1111 
1112 	if (error == 0)
1113 		vmxnet3_set_interrupt_idx(sc);
1114 
1115 	return (error);
1116 }
1117 
1118 static int
1119 vmxnet3_init_rxq(struct vmxnet3_softc *sc, int q)
1120 {
1121 	struct vmxnet3_rxqueue *rxq;
1122 	struct vmxnet3_rxring *rxr;
1123 	int i;
1124 
1125 	rxq = &sc->vmx_queue[q].vxq_rxqueue;
1126 
1127 	snprintf(rxq->vxrxq_name, sizeof(rxq->vxrxq_name), "%s-rx%d",
1128 	    device_xname(sc->vmx_dev), q);
1129 	rxq->vxrxq_mtx = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NET /* XXX */);
1130 
1131 	rxq->vxrxq_sc = sc;
1132 
1133 	for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1134 		rxr = &rxq->vxrxq_cmd_ring[i];
1135 		rxr->vxrxr_rid = i;
1136 		rxr->vxrxr_ndesc = sc->vmx_nrxdescs;
1137 		rxr->vxrxr_rxbuf = kmem_zalloc(rxr->vxrxr_ndesc *
1138 		    sizeof(struct vmxnet3_rxbuf), KM_SLEEP);
1139 
1140 		rxq->vxrxq_comp_ring.vxcr_ndesc += sc->vmx_nrxdescs;
1141 	}
1142 
1143 	return (0);
1144 }
1145 
1146 static int
1147 vmxnet3_init_txq(struct vmxnet3_softc *sc, int q)
1148 {
1149 	struct vmxnet3_txqueue *txq;
1150 	struct vmxnet3_txring *txr;
1151 
1152 	txq = &sc->vmx_queue[q].vxq_txqueue;
1153 	txr = &txq->vxtxq_cmd_ring;
1154 
1155 	snprintf(txq->vxtxq_name, sizeof(txq->vxtxq_name), "%s-tx%d",
1156 	    device_xname(sc->vmx_dev), q);
1157 	txq->vxtxq_mtx = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NET /* XXX */);
1158 
1159 	txq->vxtxq_sc = sc;
1160 
1161 	txq->vxtxq_si = softint_establish(SOFTINT_NET | SOFTINT_MPSAFE,
1162 	    vmxnet3_deferred_transmit, txq);
1163 	if (txq->vxtxq_si == NULL) {
1164 		mutex_obj_free(txq->vxtxq_mtx);
1165 		aprint_error_dev(sc->vmx_dev,
1166 		    "softint_establish for vxtxq_si failed\n");
1167 		return ENOMEM;
1168 	}
1169 
1170 	txr->vxtxr_ndesc = sc->vmx_ntxdescs;
1171 	txr->vxtxr_txbuf = kmem_zalloc(txr->vxtxr_ndesc *
1172 	    sizeof(struct vmxnet3_txbuf), KM_SLEEP);
1173 
1174 	txq->vxtxq_comp_ring.vxcr_ndesc = sc->vmx_ntxdescs;
1175 
1176 	txq->vxtxq_interq = pcq_create(sc->vmx_ntxdescs, KM_SLEEP);
1177 
1178 	return (0);
1179 }
1180 
1181 static int
1182 vmxnet3_alloc_rxtx_queues(struct vmxnet3_softc *sc)
1183 {
1184 	int i, error, max_nqueues;
1185 
1186 	KASSERT(!cpu_intr_p());
1187 	KASSERT(!cpu_softintr_p());
1188 
1189 	/*
1190 	 * Only attempt to create multiple queues if MSIX is available.
1191 	 * This check prevents us from allocating queue structures that
1192 	 * we will not use.
1193 	 *
1194 	 * FreeBSD:
1195 	 * MSIX is disabled by default because its apparently broken for
1196 	 * devices passed through by at least ESXi 5.1.
1197 	 * The hw.pci.honor_msi_blacklist tunable must be set to zero for MSIX.
1198 	 */
1199 	if (sc->vmx_flags & VMXNET3_FLAG_NO_MSIX) {
1200 		sc->vmx_max_nrxqueues = 1;
1201 		sc->vmx_max_ntxqueues = 1;
1202 	}
1203 
1204 	max_nqueues = MAX(sc->vmx_max_ntxqueues, sc->vmx_max_nrxqueues);
1205 	sc->vmx_queue = kmem_zalloc(sizeof(struct vmxnet3_queue) * max_nqueues,
1206 	    KM_SLEEP);
1207 
1208 	for (i = 0; i < max_nqueues; i++) {
1209 		struct vmxnet3_queue *vmxq = &sc->vmx_queue[i];
1210 		vmxq->vxq_id = i;
1211 	}
1212 
1213 	for (i = 0; i < sc->vmx_max_nrxqueues; i++) {
1214 		error = vmxnet3_init_rxq(sc, i);
1215 		if (error)
1216 			return (error);
1217 	}
1218 
1219 	for (i = 0; i < sc->vmx_max_ntxqueues; i++) {
1220 		error = vmxnet3_init_txq(sc, i);
1221 		if (error)
1222 			return (error);
1223 	}
1224 
1225 	return (0);
1226 }
1227 
1228 static void
1229 vmxnet3_destroy_rxq(struct vmxnet3_rxqueue *rxq)
1230 {
1231 	struct vmxnet3_rxring *rxr;
1232 	int i;
1233 
1234 	rxq->vxrxq_sc = NULL;
1235 
1236 	for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1237 		rxr = &rxq->vxrxq_cmd_ring[i];
1238 
1239 		if (rxr->vxrxr_rxbuf != NULL) {
1240 			kmem_free(rxr->vxrxr_rxbuf,
1241 			    rxr->vxrxr_ndesc * sizeof(struct vmxnet3_rxbuf));
1242 			rxr->vxrxr_rxbuf = NULL;
1243 		}
1244 	}
1245 
1246 	if (rxq->vxrxq_mtx != NULL)
1247 		mutex_obj_free(rxq->vxrxq_mtx);
1248 }
1249 
1250 static void
1251 vmxnet3_destroy_txq(struct vmxnet3_txqueue *txq)
1252 {
1253 	struct vmxnet3_txring *txr;
1254 	struct mbuf *m;
1255 
1256 	txr = &txq->vxtxq_cmd_ring;
1257 
1258 	txq->vxtxq_sc = NULL;
1259 
1260 	softint_disestablish(txq->vxtxq_si);
1261 
1262 	while ((m = pcq_get(txq->vxtxq_interq)) != NULL)
1263 		m_freem(m);
1264 	pcq_destroy(txq->vxtxq_interq);
1265 
1266 	if (txr->vxtxr_txbuf != NULL) {
1267 		kmem_free(txr->vxtxr_txbuf,
1268 		    txr->vxtxr_ndesc * sizeof(struct vmxnet3_txbuf));
1269 		txr->vxtxr_txbuf = NULL;
1270 	}
1271 
1272 	if (txq->vxtxq_mtx != NULL)
1273 		mutex_obj_free(txq->vxtxq_mtx);
1274 }
1275 
1276 static void
1277 vmxnet3_free_rxtx_queues(struct vmxnet3_softc *sc)
1278 {
1279 	int i;
1280 
1281 	if (sc->vmx_queue != NULL) {
1282 		int max_nqueues;
1283 
1284 		for (i = 0; i < sc->vmx_max_nrxqueues; i++)
1285 			vmxnet3_destroy_rxq(&sc->vmx_queue[i].vxq_rxqueue);
1286 
1287 		for (i = 0; i < sc->vmx_max_ntxqueues; i++)
1288 			vmxnet3_destroy_txq(&sc->vmx_queue[i].vxq_txqueue);
1289 
1290 		max_nqueues = MAX(sc->vmx_max_nrxqueues, sc->vmx_max_ntxqueues);
1291 		kmem_free(sc->vmx_queue,
1292 		    sizeof(struct vmxnet3_queue) * max_nqueues);
1293 	}
1294 }
1295 
1296 static int
1297 vmxnet3_alloc_shared_data(struct vmxnet3_softc *sc)
1298 {
1299 	device_t dev;
1300 	uint8_t *kva;
1301 	size_t size;
1302 	int i, error;
1303 
1304 	dev = sc->vmx_dev;
1305 
1306 	size = sizeof(struct vmxnet3_driver_shared);
1307 	error = vmxnet3_dma_malloc(sc, size, 1, &sc->vmx_ds_dma);
1308 	if (error) {
1309 		device_printf(dev, "cannot alloc shared memory\n");
1310 		return (error);
1311 	}
1312 	sc->vmx_ds = (struct vmxnet3_driver_shared *) sc->vmx_ds_dma.dma_vaddr;
1313 
1314 	size = sc->vmx_ntxqueues * sizeof(struct vmxnet3_txq_shared) +
1315 	    sc->vmx_nrxqueues * sizeof(struct vmxnet3_rxq_shared);
1316 	error = vmxnet3_dma_malloc(sc, size, 128, &sc->vmx_qs_dma);
1317 	if (error) {
1318 		device_printf(dev, "cannot alloc queue shared memory\n");
1319 		return (error);
1320 	}
1321 	sc->vmx_qs = (void *) sc->vmx_qs_dma.dma_vaddr;
1322 	kva = sc->vmx_qs;
1323 
1324 	for (i = 0; i < sc->vmx_ntxqueues; i++) {
1325 		sc->vmx_queue[i].vxq_txqueue.vxtxq_ts =
1326 		    (struct vmxnet3_txq_shared *) kva;
1327 		kva += sizeof(struct vmxnet3_txq_shared);
1328 	}
1329 	for (i = 0; i < sc->vmx_nrxqueues; i++) {
1330 		sc->vmx_queue[i].vxq_rxqueue.vxrxq_rs =
1331 		    (struct vmxnet3_rxq_shared *) kva;
1332 		kva += sizeof(struct vmxnet3_rxq_shared);
1333 	}
1334 
1335 	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
1336 		size = sizeof(struct vmxnet3_rss_shared);
1337 		error = vmxnet3_dma_malloc(sc, size, 128, &sc->vmx_rss_dma);
1338 		if (error) {
1339 			device_printf(dev, "cannot alloc rss shared memory\n");
1340 			return (error);
1341 		}
1342 		sc->vmx_rss =
1343 		    (struct vmxnet3_rss_shared *) sc->vmx_rss_dma.dma_vaddr;
1344 	}
1345 
1346 	return (0);
1347 }
1348 
1349 static void
1350 vmxnet3_free_shared_data(struct vmxnet3_softc *sc)
1351 {
1352 
1353 	if (sc->vmx_rss != NULL) {
1354 		vmxnet3_dma_free(sc, &sc->vmx_rss_dma);
1355 		sc->vmx_rss = NULL;
1356 	}
1357 
1358 	if (sc->vmx_qs != NULL) {
1359 		vmxnet3_dma_free(sc, &sc->vmx_qs_dma);
1360 		sc->vmx_qs = NULL;
1361 	}
1362 
1363 	if (sc->vmx_ds != NULL) {
1364 		vmxnet3_dma_free(sc, &sc->vmx_ds_dma);
1365 		sc->vmx_ds = NULL;
1366 	}
1367 }
1368 
1369 static int
1370 vmxnet3_alloc_txq_data(struct vmxnet3_softc *sc)
1371 {
1372 	device_t dev;
1373 	struct vmxnet3_txqueue *txq;
1374 	struct vmxnet3_txring *txr;
1375 	struct vmxnet3_comp_ring *txc;
1376 	size_t descsz, compsz;
1377 	u_int i;
1378 	int q, error;
1379 
1380 	dev = sc->vmx_dev;
1381 
1382 	for (q = 0; q < sc->vmx_ntxqueues; q++) {
1383 		txq = &sc->vmx_queue[q].vxq_txqueue;
1384 		txr = &txq->vxtxq_cmd_ring;
1385 		txc = &txq->vxtxq_comp_ring;
1386 
1387 		descsz = txr->vxtxr_ndesc * sizeof(struct vmxnet3_txdesc);
1388 		compsz = txr->vxtxr_ndesc * sizeof(struct vmxnet3_txcompdesc);
1389 
1390 		error = vmxnet3_dma_malloc(sc, descsz, 512, &txr->vxtxr_dma);
1391 		if (error) {
1392 			device_printf(dev, "cannot alloc Tx descriptors for "
1393 			    "queue %d error %d\n", q, error);
1394 			return (error);
1395 		}
1396 		txr->vxtxr_txd =
1397 		    (struct vmxnet3_txdesc *) txr->vxtxr_dma.dma_vaddr;
1398 
1399 		error = vmxnet3_dma_malloc(sc, compsz, 512, &txc->vxcr_dma);
1400 		if (error) {
1401 			device_printf(dev, "cannot alloc Tx comp descriptors "
1402 			   "for queue %d error %d\n", q, error);
1403 			return (error);
1404 		}
1405 		txc->vxcr_u.txcd =
1406 		    (struct vmxnet3_txcompdesc *) txc->vxcr_dma.dma_vaddr;
1407 
1408 		for (i = 0; i < txr->vxtxr_ndesc; i++) {
1409 			error = bus_dmamap_create(sc->vmx_dmat, VMXNET3_TX_MAXSIZE,
1410 			    VMXNET3_TX_MAXSEGS, VMXNET3_TX_MAXSEGSIZE, 0, BUS_DMA_NOWAIT,
1411 			    &txr->vxtxr_txbuf[i].vtxb_dmamap);
1412 			if (error) {
1413 				device_printf(dev, "unable to create Tx buf "
1414 				    "dmamap for queue %d idx %d\n", q, i);
1415 				return (error);
1416 			}
1417 		}
1418 	}
1419 
1420 	return (0);
1421 }
1422 
1423 static void
1424 vmxnet3_free_txq_data(struct vmxnet3_softc *sc)
1425 {
1426 	struct vmxnet3_txqueue *txq;
1427 	struct vmxnet3_txring *txr;
1428 	struct vmxnet3_comp_ring *txc;
1429 	struct vmxnet3_txbuf *txb;
1430 	u_int i;
1431 	int q;
1432 
1433 	for (q = 0; q < sc->vmx_ntxqueues; q++) {
1434 		txq = &sc->vmx_queue[q].vxq_txqueue;
1435 		txr = &txq->vxtxq_cmd_ring;
1436 		txc = &txq->vxtxq_comp_ring;
1437 
1438 		for (i = 0; i < txr->vxtxr_ndesc; i++) {
1439 			txb = &txr->vxtxr_txbuf[i];
1440 			if (txb->vtxb_dmamap != NULL) {
1441 				bus_dmamap_destroy(sc->vmx_dmat,
1442 				    txb->vtxb_dmamap);
1443 				txb->vtxb_dmamap = NULL;
1444 			}
1445 		}
1446 
1447 		if (txc->vxcr_u.txcd != NULL) {
1448 			vmxnet3_dma_free(sc, &txc->vxcr_dma);
1449 			txc->vxcr_u.txcd = NULL;
1450 		}
1451 
1452 		if (txr->vxtxr_txd != NULL) {
1453 			vmxnet3_dma_free(sc, &txr->vxtxr_dma);
1454 			txr->vxtxr_txd = NULL;
1455 		}
1456 	}
1457 }
1458 
1459 static int
1460 vmxnet3_alloc_rxq_data(struct vmxnet3_softc *sc)
1461 {
1462 	device_t dev;
1463 	struct vmxnet3_rxqueue *rxq;
1464 	struct vmxnet3_rxring *rxr;
1465 	struct vmxnet3_comp_ring *rxc;
1466 	int descsz, compsz;
1467 	u_int i, j;
1468 	int q, error;
1469 
1470 	dev = sc->vmx_dev;
1471 
1472 	for (q = 0; q < sc->vmx_nrxqueues; q++) {
1473 		rxq = &sc->vmx_queue[q].vxq_rxqueue;
1474 		rxc = &rxq->vxrxq_comp_ring;
1475 		compsz = 0;
1476 
1477 		for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1478 			rxr = &rxq->vxrxq_cmd_ring[i];
1479 
1480 			descsz = rxr->vxrxr_ndesc *
1481 			    sizeof(struct vmxnet3_rxdesc);
1482 			compsz += rxr->vxrxr_ndesc *
1483 			    sizeof(struct vmxnet3_rxcompdesc);
1484 
1485 			error = vmxnet3_dma_malloc(sc, descsz, 512,
1486 			    &rxr->vxrxr_dma);
1487 			if (error) {
1488 				device_printf(dev, "cannot allocate Rx "
1489 				    "descriptors for queue %d/%d error %d\n",
1490 				    i, q, error);
1491 				return (error);
1492 			}
1493 			rxr->vxrxr_rxd =
1494 			    (struct vmxnet3_rxdesc *) rxr->vxrxr_dma.dma_vaddr;
1495 		}
1496 
1497 		error = vmxnet3_dma_malloc(sc, compsz, 512, &rxc->vxcr_dma);
1498 		if (error) {
1499 			device_printf(dev, "cannot alloc Rx comp descriptors "
1500 			    "for queue %d error %d\n", q, error);
1501 			return (error);
1502 		}
1503 		rxc->vxcr_u.rxcd =
1504 		    (struct vmxnet3_rxcompdesc *) rxc->vxcr_dma.dma_vaddr;
1505 
1506 		for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1507 			rxr = &rxq->vxrxq_cmd_ring[i];
1508 
1509 			error = bus_dmamap_create(sc->vmx_dmat, JUMBO_LEN, 1,
1510 			    JUMBO_LEN, 0, BUS_DMA_NOWAIT,
1511 			    &rxr->vxrxr_spare_dmap);
1512 			if (error) {
1513 				device_printf(dev, "unable to create spare "
1514 				    "dmamap for queue %d/%d error %d\n",
1515 				    q, i, error);
1516 				return (error);
1517 			}
1518 
1519 			for (j = 0; j < rxr->vxrxr_ndesc; j++) {
1520 				error = bus_dmamap_create(sc->vmx_dmat, JUMBO_LEN, 1,
1521 				    JUMBO_LEN, 0, BUS_DMA_NOWAIT,
1522 				    &rxr->vxrxr_rxbuf[j].vrxb_dmamap);
1523 				if (error) {
1524 					device_printf(dev, "unable to create "
1525 					    "dmamap for queue %d/%d slot %d "
1526 					    "error %d\n",
1527 					    q, i, j, error);
1528 					return (error);
1529 				}
1530 			}
1531 		}
1532 	}
1533 
1534 	return (0);
1535 }
1536 
1537 static void
1538 vmxnet3_free_rxq_data(struct vmxnet3_softc *sc)
1539 {
1540 	struct vmxnet3_rxqueue *rxq;
1541 	struct vmxnet3_rxring *rxr;
1542 	struct vmxnet3_comp_ring *rxc;
1543 	struct vmxnet3_rxbuf *rxb;
1544 	u_int i, j;
1545 	int q;
1546 
1547 	for (q = 0; q < sc->vmx_nrxqueues; q++) {
1548 		rxq = &sc->vmx_queue[q].vxq_rxqueue;
1549 		rxc = &rxq->vxrxq_comp_ring;
1550 
1551 		for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1552 			rxr = &rxq->vxrxq_cmd_ring[i];
1553 
1554 			if (rxr->vxrxr_spare_dmap != NULL) {
1555 				bus_dmamap_destroy(sc->vmx_dmat,
1556 				    rxr->vxrxr_spare_dmap);
1557 				rxr->vxrxr_spare_dmap = NULL;
1558 			}
1559 
1560 			for (j = 0; j < rxr->vxrxr_ndesc; j++) {
1561 				rxb = &rxr->vxrxr_rxbuf[j];
1562 				if (rxb->vrxb_dmamap != NULL) {
1563 					bus_dmamap_destroy(sc->vmx_dmat,
1564 					    rxb->vrxb_dmamap);
1565 					rxb->vrxb_dmamap = NULL;
1566 				}
1567 			}
1568 		}
1569 
1570 		if (rxc->vxcr_u.rxcd != NULL) {
1571 			vmxnet3_dma_free(sc, &rxc->vxcr_dma);
1572 			rxc->vxcr_u.rxcd = NULL;
1573 		}
1574 
1575 		for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1576 			rxr = &rxq->vxrxq_cmd_ring[i];
1577 
1578 			if (rxr->vxrxr_rxd != NULL) {
1579 				vmxnet3_dma_free(sc, &rxr->vxrxr_dma);
1580 				rxr->vxrxr_rxd = NULL;
1581 			}
1582 		}
1583 	}
1584 }
1585 
1586 static int
1587 vmxnet3_alloc_queue_data(struct vmxnet3_softc *sc)
1588 {
1589 	int error;
1590 
1591 	error = vmxnet3_alloc_txq_data(sc);
1592 	if (error)
1593 		return (error);
1594 
1595 	error = vmxnet3_alloc_rxq_data(sc);
1596 	if (error)
1597 		return (error);
1598 
1599 	return (0);
1600 }
1601 
1602 static void
1603 vmxnet3_free_queue_data(struct vmxnet3_softc *sc)
1604 {
1605 
1606 	if (sc->vmx_queue != NULL) {
1607 		vmxnet3_free_rxq_data(sc);
1608 		vmxnet3_free_txq_data(sc);
1609 	}
1610 }
1611 
1612 static int
1613 vmxnet3_alloc_mcast_table(struct vmxnet3_softc *sc)
1614 {
1615 	int error;
1616 
1617 	error = vmxnet3_dma_malloc(sc, VMXNET3_MULTICAST_MAX * ETHER_ADDR_LEN,
1618 	    32, &sc->vmx_mcast_dma);
1619 	if (error)
1620 		device_printf(sc->vmx_dev, "unable to alloc multicast table\n");
1621 	else
1622 		sc->vmx_mcast = sc->vmx_mcast_dma.dma_vaddr;
1623 
1624 	return (error);
1625 }
1626 
1627 static void
1628 vmxnet3_free_mcast_table(struct vmxnet3_softc *sc)
1629 {
1630 
1631 	if (sc->vmx_mcast != NULL) {
1632 		vmxnet3_dma_free(sc, &sc->vmx_mcast_dma);
1633 		sc->vmx_mcast = NULL;
1634 	}
1635 }
1636 
1637 static void
1638 vmxnet3_init_shared_data(struct vmxnet3_softc *sc)
1639 {
1640 	struct vmxnet3_driver_shared *ds;
1641 	struct vmxnet3_txqueue *txq;
1642 	struct vmxnet3_txq_shared *txs;
1643 	struct vmxnet3_rxqueue *rxq;
1644 	struct vmxnet3_rxq_shared *rxs;
1645 	int i;
1646 
1647 	ds = sc->vmx_ds;
1648 
1649 	/*
1650 	 * Initialize fields of the shared data that remains the same across
1651 	 * reinits. Note the shared data is zero'd when allocated.
1652 	 */
1653 
1654 	ds->magic = VMXNET3_REV1_MAGIC;
1655 
1656 	/* DriverInfo */
1657 	ds->version = VMXNET3_DRIVER_VERSION;
1658 	ds->guest = VMXNET3_GOS_FREEBSD |
1659 #ifdef __LP64__
1660 	    VMXNET3_GOS_64BIT;
1661 #else
1662 	    VMXNET3_GOS_32BIT;
1663 #endif
1664 	ds->vmxnet3_revision = 1;
1665 	ds->upt_version = 1;
1666 
1667 	/* Misc. conf */
1668 	ds->driver_data = vtophys(sc);
1669 	ds->driver_data_len = sizeof(struct vmxnet3_softc);
1670 	ds->queue_shared = sc->vmx_qs_dma.dma_paddr;
1671 	ds->queue_shared_len = sc->vmx_qs_dma.dma_size;
1672 	ds->nrxsg_max = sc->vmx_max_rxsegs;
1673 
1674 	/* RSS conf */
1675 	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
1676 		ds->rss.version = 1;
1677 		ds->rss.paddr = sc->vmx_rss_dma.dma_paddr;
1678 		ds->rss.len = sc->vmx_rss_dma.dma_size;
1679 	}
1680 
1681 	/* Interrupt control. */
1682 	ds->automask = sc->vmx_intr_mask_mode == VMXNET3_IMM_AUTO;
1683 	ds->nintr = sc->vmx_nintrs;
1684 	ds->evintr = sc->vmx_event_intr_idx;
1685 	ds->ictrl = VMXNET3_ICTRL_DISABLE_ALL;
1686 
1687 	for (i = 0; i < sc->vmx_nintrs; i++)
1688 		ds->modlevel[i] = UPT1_IMOD_ADAPTIVE;
1689 
1690 	/* Receive filter. */
1691 	ds->mcast_table = sc->vmx_mcast_dma.dma_paddr;
1692 	ds->mcast_tablelen = sc->vmx_mcast_dma.dma_size;
1693 
1694 	/* Tx queues */
1695 	for (i = 0; i < sc->vmx_ntxqueues; i++) {
1696 		txq = &sc->vmx_queue[i].vxq_txqueue;
1697 		txs = txq->vxtxq_ts;
1698 
1699 		txs->cmd_ring = txq->vxtxq_cmd_ring.vxtxr_dma.dma_paddr;
1700 		txs->cmd_ring_len = txq->vxtxq_cmd_ring.vxtxr_ndesc;
1701 		txs->comp_ring = txq->vxtxq_comp_ring.vxcr_dma.dma_paddr;
1702 		txs->comp_ring_len = txq->vxtxq_comp_ring.vxcr_ndesc;
1703 		txs->driver_data = vtophys(txq);
1704 		txs->driver_data_len = sizeof(struct vmxnet3_txqueue);
1705 	}
1706 
1707 	/* Rx queues */
1708 	for (i = 0; i < sc->vmx_nrxqueues; i++) {
1709 		rxq = &sc->vmx_queue[i].vxq_rxqueue;
1710 		rxs = rxq->vxrxq_rs;
1711 
1712 		rxs->cmd_ring[0] = rxq->vxrxq_cmd_ring[0].vxrxr_dma.dma_paddr;
1713 		rxs->cmd_ring_len[0] = rxq->vxrxq_cmd_ring[0].vxrxr_ndesc;
1714 		rxs->cmd_ring[1] = rxq->vxrxq_cmd_ring[1].vxrxr_dma.dma_paddr;
1715 		rxs->cmd_ring_len[1] = rxq->vxrxq_cmd_ring[1].vxrxr_ndesc;
1716 		rxs->comp_ring = rxq->vxrxq_comp_ring.vxcr_dma.dma_paddr;
1717 		rxs->comp_ring_len = rxq->vxrxq_comp_ring.vxcr_ndesc;
1718 		rxs->driver_data = vtophys(rxq);
1719 		rxs->driver_data_len = sizeof(struct vmxnet3_rxqueue);
1720 	}
1721 }
1722 
1723 static void
1724 vmxnet3_reinit_rss_shared_data(struct vmxnet3_softc *sc)
1725 {
1726 	/*
1727 	 * Use the same key as the Linux driver until FreeBSD can do
1728 	 * RSS (presumably Toeplitz) in software.
1729 	 */
1730 	static const uint8_t rss_key[UPT1_RSS_MAX_KEY_SIZE] = {
1731 	    0x3b, 0x56, 0xd1, 0x56, 0x13, 0x4a, 0xe7, 0xac,
1732 	    0xe8, 0x79, 0x09, 0x75, 0xe8, 0x65, 0x79, 0x28,
1733 	    0x35, 0x12, 0xb9, 0x56, 0x7c, 0x76, 0x4b, 0x70,
1734 	    0xd8, 0x56, 0xa3, 0x18, 0x9b, 0x0a, 0xee, 0xf3,
1735 	    0x96, 0xa6, 0x9f, 0x8f, 0x9e, 0x8c, 0x90, 0xc9,
1736 	};
1737 
1738 	struct vmxnet3_rss_shared *rss;
1739 	int i;
1740 
1741 	rss = sc->vmx_rss;
1742 
1743 	rss->hash_type =
1744 	    UPT1_RSS_HASH_TYPE_IPV4 | UPT1_RSS_HASH_TYPE_TCP_IPV4 |
1745 	    UPT1_RSS_HASH_TYPE_IPV6 | UPT1_RSS_HASH_TYPE_TCP_IPV6;
1746 	rss->hash_func = UPT1_RSS_HASH_FUNC_TOEPLITZ;
1747 	rss->hash_key_size = UPT1_RSS_MAX_KEY_SIZE;
1748 	rss->ind_table_size = UPT1_RSS_MAX_IND_TABLE_SIZE;
1749 	memcpy(rss->hash_key, rss_key, UPT1_RSS_MAX_KEY_SIZE);
1750 
1751 	for (i = 0; i < UPT1_RSS_MAX_IND_TABLE_SIZE; i++)
1752 		rss->ind_table[i] = i % sc->vmx_nrxqueues;
1753 }
1754 
1755 static void
1756 vmxnet3_reinit_shared_data(struct vmxnet3_softc *sc)
1757 {
1758 	struct ifnet *ifp;
1759 	struct vmxnet3_driver_shared *ds;
1760 
1761 	ifp = &sc->vmx_ethercom.ec_if;
1762 	ds = sc->vmx_ds;
1763 
1764 	ds->mtu = ifp->if_mtu;
1765 	ds->ntxqueue = sc->vmx_ntxqueues;
1766 	ds->nrxqueue = sc->vmx_nrxqueues;
1767 
1768 	ds->upt_features = 0;
1769 	if (ifp->if_capenable &
1770 	    (IFCAP_CSUM_IPv4_Rx | IFCAP_CSUM_TCPv4_Rx | IFCAP_CSUM_UDPv4_Rx |
1771 	    IFCAP_CSUM_TCPv6_Rx | IFCAP_CSUM_UDPv6_Rx))
1772 		ds->upt_features |= UPT1_F_CSUM;
1773 	if (sc->vmx_ethercom.ec_capenable & ETHERCAP_VLAN_HWTAGGING)
1774 		ds->upt_features |= UPT1_F_VLAN;
1775 
1776 	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
1777 		ds->upt_features |= UPT1_F_RSS;
1778 		vmxnet3_reinit_rss_shared_data(sc);
1779 	}
1780 
1781 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_DSL, sc->vmx_ds_dma.dma_paddr);
1782 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_DSH,
1783 	    (uint64_t) sc->vmx_ds_dma.dma_paddr >> 32);
1784 }
1785 
1786 static int
1787 vmxnet3_alloc_data(struct vmxnet3_softc *sc)
1788 {
1789 	int error;
1790 
1791 	error = vmxnet3_alloc_shared_data(sc);
1792 	if (error)
1793 		return (error);
1794 
1795 	error = vmxnet3_alloc_queue_data(sc);
1796 	if (error)
1797 		return (error);
1798 
1799 	error = vmxnet3_alloc_mcast_table(sc);
1800 	if (error)
1801 		return (error);
1802 
1803 	vmxnet3_init_shared_data(sc);
1804 
1805 	return (0);
1806 }
1807 
1808 static void
1809 vmxnet3_free_data(struct vmxnet3_softc *sc)
1810 {
1811 
1812 	vmxnet3_free_mcast_table(sc);
1813 	vmxnet3_free_queue_data(sc);
1814 	vmxnet3_free_shared_data(sc);
1815 }
1816 
1817 static int
1818 vmxnet3_setup_interface(struct vmxnet3_softc *sc)
1819 {
1820 	struct ifnet *ifp = &sc->vmx_ethercom.ec_if;
1821 
1822 	vmxnet3_get_lladdr(sc);
1823 	aprint_normal_dev(sc->vmx_dev, "Ethernet address %s\n",
1824 	    ether_sprintf(sc->vmx_lladdr));
1825 	vmxnet3_set_lladdr(sc);
1826 
1827 	strlcpy(ifp->if_xname, device_xname(sc->vmx_dev), IFNAMSIZ);
1828 	ifp->if_softc = sc;
1829 	ifp->if_flags = IFF_BROADCAST | IFF_MULTICAST | IFF_SIMPLEX;
1830 	ifp->if_extflags = IFEF_MPSAFE;
1831 	ifp->if_ioctl = vmxnet3_ioctl;
1832 	ifp->if_start = vmxnet3_start;
1833 	ifp->if_transmit = vmxnet3_transmit;
1834 	ifp->if_watchdog = NULL;
1835 	ifp->if_init = vmxnet3_init;
1836 	ifp->if_stop = vmxnet3_stop;
1837 	sc->vmx_ethercom.ec_if.if_capabilities |=IFCAP_CSUM_IPv4_Rx |
1838 		    IFCAP_CSUM_TCPv4_Tx | IFCAP_CSUM_TCPv4_Rx |
1839 		    IFCAP_CSUM_UDPv4_Tx | IFCAP_CSUM_UDPv4_Rx |
1840 		    IFCAP_CSUM_TCPv6_Tx | IFCAP_CSUM_TCPv6_Rx |
1841 		    IFCAP_CSUM_UDPv6_Tx | IFCAP_CSUM_UDPv6_Rx;
1842 
1843 	ifp->if_capenable = ifp->if_capabilities;
1844 
1845 	sc->vmx_ethercom.ec_if.if_capabilities |= IFCAP_TSOv4 | IFCAP_TSOv6;
1846 
1847 	sc->vmx_ethercom.ec_capabilities |=
1848 	    ETHERCAP_VLAN_MTU | ETHERCAP_VLAN_HWTAGGING | ETHERCAP_JUMBO_MTU;
1849 	sc->vmx_ethercom.ec_capenable |= ETHERCAP_VLAN_HWTAGGING;
1850 
1851 	IFQ_SET_MAXLEN(&ifp->if_snd, sc->vmx_ntxdescs);
1852 	IFQ_SET_READY(&ifp->if_snd);
1853 
1854 	/* Initialize ifmedia structures. */
1855 	sc->vmx_ethercom.ec_ifmedia = &sc->vmx_media;
1856 	ifmedia_init_with_lock(&sc->vmx_media, IFM_IMASK, vmxnet3_ifmedia_change,
1857 	    vmxnet3_ifmedia_status, sc->vmx_mtx);
1858 	ifmedia_add(&sc->vmx_media, IFM_ETHER | IFM_AUTO, 0, NULL);
1859 	ifmedia_add(&sc->vmx_media, IFM_ETHER | IFM_10G_T | IFM_FDX, 0, NULL);
1860 	ifmedia_add(&sc->vmx_media, IFM_ETHER | IFM_10G_T, 0, NULL);
1861 	ifmedia_add(&sc->vmx_media, IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
1862 	ifmedia_add(&sc->vmx_media, IFM_ETHER | IFM_1000_T, 0, NULL);
1863 	ifmedia_set(&sc->vmx_media, IFM_ETHER | IFM_AUTO);
1864 
1865 	if_attach(ifp);
1866 	if_deferred_start_init(ifp, NULL);
1867 	ether_ifattach(ifp, sc->vmx_lladdr);
1868 	ether_set_ifflags_cb(&sc->vmx_ethercom, vmxnet3_ifflags_cb);
1869 	vmxnet3_cmd_link_status(ifp);
1870 
1871 	/* should set before setting interrupts */
1872 	sc->vmx_rx_intr_process_limit = VMXNET3_RX_INTR_PROCESS_LIMIT;
1873 	sc->vmx_rx_process_limit = VMXNET3_RX_PROCESS_LIMIT;
1874 	sc->vmx_tx_intr_process_limit = VMXNET3_TX_INTR_PROCESS_LIMIT;
1875 	sc->vmx_tx_process_limit = VMXNET3_TX_PROCESS_LIMIT;
1876 
1877 	return (0);
1878 }
1879 
1880 static int
1881 vmxnet3_setup_sysctl(struct vmxnet3_softc *sc)
1882 {
1883 	const char *devname;
1884 	struct sysctllog **log;
1885 	const struct sysctlnode *rnode, *rxnode, *txnode;
1886 	int error;
1887 
1888 	log = &sc->vmx_sysctllog;
1889 	devname = device_xname(sc->vmx_dev);
1890 
1891 	error = sysctl_createv(log, 0, NULL, &rnode,
1892 	    0, CTLTYPE_NODE, devname,
1893 	    SYSCTL_DESCR("vmxnet3 information and settings"),
1894 	    NULL, 0, NULL, 0, CTL_HW, CTL_CREATE, CTL_EOL);
1895 	if (error)
1896 		goto out;
1897 	error = sysctl_createv(log, 0, &rnode, NULL,
1898 	    CTLFLAG_READWRITE, CTLTYPE_BOOL, "txrx_workqueue",
1899 	    SYSCTL_DESCR("Use workqueue for packet processing"),
1900 	    NULL, 0, &sc->vmx_txrx_workqueue, 0, CTL_CREATE, CTL_EOL);
1901 	if (error)
1902 		goto out;
1903 
1904 	error = sysctl_createv(log, 0, &rnode, &rxnode,
1905 	    0, CTLTYPE_NODE, "rx",
1906 	    SYSCTL_DESCR("vmxnet3 information and settings for Rx"),
1907 	    NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL);
1908 	if (error)
1909 		goto out;
1910 	error = sysctl_createv(log, 0, &rxnode, NULL,
1911 	    CTLFLAG_READWRITE, CTLTYPE_INT, "intr_process_limit",
1912 	    SYSCTL_DESCR("max number of Rx packets to process for interrupt processing"),
1913 	    NULL, 0, &sc->vmx_rx_intr_process_limit, 0, CTL_CREATE, CTL_EOL);
1914 	if (error)
1915 		goto out;
1916 	error = sysctl_createv(log, 0, &rxnode, NULL,
1917 	    CTLFLAG_READWRITE, CTLTYPE_INT, "process_limit",
1918 	    SYSCTL_DESCR("max number of Rx packets to process for deferred processing"),
1919 	    NULL, 0, &sc->vmx_rx_process_limit, 0, CTL_CREATE, CTL_EOL);
1920 	if (error)
1921 		goto out;
1922 
1923 	error = sysctl_createv(log, 0, &rnode, &txnode,
1924 	    0, CTLTYPE_NODE, "tx",
1925 	    SYSCTL_DESCR("vmxnet3 information and settings for Tx"),
1926 	    NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL);
1927 	if (error)
1928 		goto out;
1929 	error = sysctl_createv(log, 0, &txnode, NULL,
1930 	    CTLFLAG_READWRITE, CTLTYPE_INT, "intr_process_limit",
1931 	    SYSCTL_DESCR("max number of Tx packets to process for interrupt processing"),
1932 	    NULL, 0, &sc->vmx_tx_intr_process_limit, 0, CTL_CREATE, CTL_EOL);
1933 	if (error)
1934 		goto out;
1935 	error = sysctl_createv(log, 0, &txnode, NULL,
1936 	    CTLFLAG_READWRITE, CTLTYPE_INT, "process_limit",
1937 	    SYSCTL_DESCR("max number of Tx packets to process for deferred processing"),
1938 	    NULL, 0, &sc->vmx_tx_process_limit, 0, CTL_CREATE, CTL_EOL);
1939 
1940 out:
1941 	if (error) {
1942 		aprint_error_dev(sc->vmx_dev,
1943 		    "unable to create sysctl node\n");
1944 		sysctl_teardown(log);
1945 	}
1946 	return error;
1947 }
1948 
1949 static int
1950 vmxnet3_setup_stats(struct vmxnet3_softc *sc)
1951 {
1952 	struct vmxnet3_queue *vmxq;
1953 	struct vmxnet3_txqueue *txq;
1954 	struct vmxnet3_rxqueue *rxq;
1955 	int i;
1956 
1957 	for (i = 0; i < sc->vmx_ntxqueues; i++) {
1958 		vmxq = &sc->vmx_queue[i];
1959 		txq = &vmxq->vxq_txqueue;
1960 		evcnt_attach_dynamic(&txq->vxtxq_intr, EVCNT_TYPE_INTR,
1961 		    NULL, txq->vxtxq_name, "Interrupt on queue");
1962 		evcnt_attach_dynamic(&txq->vxtxq_defer, EVCNT_TYPE_MISC,
1963 		    NULL, txq->vxtxq_name, "Handled queue in softint/workqueue");
1964 		evcnt_attach_dynamic(&txq->vxtxq_deferreq, EVCNT_TYPE_MISC,
1965 		    NULL, txq->vxtxq_name, "Requested in softint/workqueue");
1966 		evcnt_attach_dynamic(&txq->vxtxq_pcqdrop, EVCNT_TYPE_MISC,
1967 		    NULL, txq->vxtxq_name, "Dropped in pcq");
1968 		evcnt_attach_dynamic(&txq->vxtxq_transmitdef, EVCNT_TYPE_MISC,
1969 		    NULL, txq->vxtxq_name, "Deferred transmit");
1970 		evcnt_attach_dynamic(&txq->vxtxq_watchdogto, EVCNT_TYPE_MISC,
1971 		    NULL, txq->vxtxq_name, "Watchdog timeout");
1972 		evcnt_attach_dynamic(&txq->vxtxq_defragged, EVCNT_TYPE_MISC,
1973 		    NULL, txq->vxtxq_name, "m_defrag successed");
1974 		evcnt_attach_dynamic(&txq->vxtxq_defrag_failed, EVCNT_TYPE_MISC,
1975 		    NULL, txq->vxtxq_name, "m_defrag failed");
1976 	}
1977 
1978 	for (i = 0; i < sc->vmx_nrxqueues; i++) {
1979 		vmxq = &sc->vmx_queue[i];
1980 		rxq = &vmxq->vxq_rxqueue;
1981 		evcnt_attach_dynamic(&rxq->vxrxq_intr, EVCNT_TYPE_INTR,
1982 		    NULL, rxq->vxrxq_name, "Interrupt on queue");
1983 		evcnt_attach_dynamic(&rxq->vxrxq_defer, EVCNT_TYPE_MISC,
1984 		    NULL, rxq->vxrxq_name, "Handled queue in softint/workqueue");
1985 		evcnt_attach_dynamic(&rxq->vxrxq_deferreq, EVCNT_TYPE_MISC,
1986 		    NULL, rxq->vxrxq_name, "Requested in softint/workqueue");
1987 		evcnt_attach_dynamic(&rxq->vxrxq_mgetcl_failed, EVCNT_TYPE_MISC,
1988 		    NULL, rxq->vxrxq_name, "MCLGET failed");
1989 		evcnt_attach_dynamic(&rxq->vxrxq_mbuf_load_failed, EVCNT_TYPE_MISC,
1990 		    NULL, rxq->vxrxq_name, "bus_dmamap_load_mbuf failed");
1991 	}
1992 
1993 	evcnt_attach_dynamic(&sc->vmx_event_intr, EVCNT_TYPE_INTR,
1994 	    NULL, device_xname(sc->vmx_dev), "Interrupt for other events");
1995 	evcnt_attach_dynamic(&sc->vmx_event_link, EVCNT_TYPE_MISC,
1996 	    NULL, device_xname(sc->vmx_dev), "Link status event");
1997 	evcnt_attach_dynamic(&sc->vmx_event_txqerror, EVCNT_TYPE_MISC,
1998 	    NULL, device_xname(sc->vmx_dev), "Tx queue error event");
1999 	evcnt_attach_dynamic(&sc->vmx_event_rxqerror, EVCNT_TYPE_MISC,
2000 	    NULL, device_xname(sc->vmx_dev), "Rx queue error event");
2001 	evcnt_attach_dynamic(&sc->vmx_event_dic, EVCNT_TYPE_MISC,
2002 	    NULL, device_xname(sc->vmx_dev), "Device impl change event");
2003 	evcnt_attach_dynamic(&sc->vmx_event_debug, EVCNT_TYPE_MISC,
2004 	    NULL, device_xname(sc->vmx_dev), "Debug event");
2005 
2006 	return 0;
2007 }
2008 
2009 static void
2010 vmxnet3_teardown_stats(struct vmxnet3_softc *sc)
2011 {
2012 	struct vmxnet3_queue *vmxq;
2013 	struct vmxnet3_txqueue *txq;
2014 	struct vmxnet3_rxqueue *rxq;
2015 	int i;
2016 
2017 	for (i = 0; i < sc->vmx_ntxqueues; i++) {
2018 		vmxq = &sc->vmx_queue[i];
2019 		txq = &vmxq->vxq_txqueue;
2020 		evcnt_detach(&txq->vxtxq_intr);
2021 		evcnt_detach(&txq->vxtxq_defer);
2022 		evcnt_detach(&txq->vxtxq_deferreq);
2023 		evcnt_detach(&txq->vxtxq_pcqdrop);
2024 		evcnt_detach(&txq->vxtxq_transmitdef);
2025 		evcnt_detach(&txq->vxtxq_watchdogto);
2026 		evcnt_detach(&txq->vxtxq_defragged);
2027 		evcnt_detach(&txq->vxtxq_defrag_failed);
2028 	}
2029 
2030 	for (i = 0; i < sc->vmx_nrxqueues; i++) {
2031 		vmxq = &sc->vmx_queue[i];
2032 		rxq = &vmxq->vxq_rxqueue;
2033 		evcnt_detach(&rxq->vxrxq_intr);
2034 		evcnt_detach(&rxq->vxrxq_defer);
2035 		evcnt_detach(&rxq->vxrxq_deferreq);
2036 		evcnt_detach(&rxq->vxrxq_mgetcl_failed);
2037 		evcnt_detach(&rxq->vxrxq_mbuf_load_failed);
2038 	}
2039 
2040 	evcnt_detach(&sc->vmx_event_intr);
2041 	evcnt_detach(&sc->vmx_event_link);
2042 	evcnt_detach(&sc->vmx_event_txqerror);
2043 	evcnt_detach(&sc->vmx_event_rxqerror);
2044 	evcnt_detach(&sc->vmx_event_dic);
2045 	evcnt_detach(&sc->vmx_event_debug);
2046 }
2047 
2048 static void
2049 vmxnet3_evintr(struct vmxnet3_softc *sc)
2050 {
2051 	device_t dev;
2052 	struct vmxnet3_txq_shared *ts;
2053 	struct vmxnet3_rxq_shared *rs;
2054 	uint32_t event;
2055 	int reset;
2056 
2057 	dev = sc->vmx_dev;
2058 	reset = 0;
2059 
2060 	VMXNET3_CORE_LOCK(sc);
2061 
2062 	/* Clear events. */
2063 	event = sc->vmx_ds->event;
2064 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_EVENT, event);
2065 
2066 	if (event & VMXNET3_EVENT_LINK) {
2067 		sc->vmx_event_link.ev_count++;
2068 		vmxnet3_if_link_status(sc);
2069 		if (sc->vmx_link_active != 0)
2070 			if_schedule_deferred_start(&sc->vmx_ethercom.ec_if);
2071 	}
2072 
2073 	if (event & (VMXNET3_EVENT_TQERROR | VMXNET3_EVENT_RQERROR)) {
2074 		if (event & VMXNET3_EVENT_TQERROR)
2075 			sc->vmx_event_txqerror.ev_count++;
2076 		if (event & VMXNET3_EVENT_RQERROR)
2077 			sc->vmx_event_rxqerror.ev_count++;
2078 
2079 		reset = 1;
2080 		vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_STATUS);
2081 		ts = sc->vmx_queue[0].vxq_txqueue.vxtxq_ts;
2082 		if (ts->stopped != 0)
2083 			device_printf(dev, "Tx queue error %#x\n", ts->error);
2084 		rs = sc->vmx_queue[0].vxq_rxqueue.vxrxq_rs;
2085 		if (rs->stopped != 0)
2086 			device_printf(dev, "Rx queue error %#x\n", rs->error);
2087 		device_printf(dev, "Rx/Tx queue error event ... resetting\n");
2088 	}
2089 
2090 	if (event & VMXNET3_EVENT_DIC) {
2091 		sc->vmx_event_dic.ev_count++;
2092 		device_printf(dev, "device implementation change event\n");
2093 	}
2094 	if (event & VMXNET3_EVENT_DEBUG) {
2095 		sc->vmx_event_debug.ev_count++;
2096 		device_printf(dev, "debug event\n");
2097 	}
2098 
2099 	if (reset != 0)
2100 		vmxnet3_init_locked(sc);
2101 
2102 	VMXNET3_CORE_UNLOCK(sc);
2103 }
2104 
2105 static bool
2106 vmxnet3_txq_eof(struct vmxnet3_txqueue *txq, u_int limit)
2107 {
2108 	struct vmxnet3_softc *sc;
2109 	struct vmxnet3_txring *txr;
2110 	struct vmxnet3_comp_ring *txc;
2111 	struct vmxnet3_txcompdesc *txcd;
2112 	struct vmxnet3_txbuf *txb;
2113 	struct ifnet *ifp;
2114 	struct mbuf *m;
2115 	u_int sop;
2116 	bool more = false;
2117 
2118 	sc = txq->vxtxq_sc;
2119 	txr = &txq->vxtxq_cmd_ring;
2120 	txc = &txq->vxtxq_comp_ring;
2121 	ifp = &sc->vmx_ethercom.ec_if;
2122 
2123 	VMXNET3_TXQ_LOCK_ASSERT(txq);
2124 
2125 	net_stat_ref_t nsr = IF_STAT_GETREF(ifp);
2126 	for (;;) {
2127 		if (limit-- == 0) {
2128 			more = true;
2129 			break;
2130 		}
2131 
2132 		txcd = &txc->vxcr_u.txcd[txc->vxcr_next];
2133 		if (txcd->gen != txc->vxcr_gen)
2134 			break;
2135 		vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
2136 
2137 		if (++txc->vxcr_next == txc->vxcr_ndesc) {
2138 			txc->vxcr_next = 0;
2139 			txc->vxcr_gen ^= 1;
2140 		}
2141 
2142 		sop = txr->vxtxr_next;
2143 		txb = &txr->vxtxr_txbuf[sop];
2144 
2145 		if ((m = txb->vtxb_m) != NULL) {
2146 			bus_dmamap_sync(sc->vmx_dmat, txb->vtxb_dmamap,
2147 			    0, txb->vtxb_dmamap->dm_mapsize,
2148 			    BUS_DMASYNC_POSTWRITE);
2149 			bus_dmamap_unload(sc->vmx_dmat, txb->vtxb_dmamap);
2150 
2151 			if_statinc_ref(nsr, if_opackets);
2152 			if_statadd_ref(nsr, if_obytes, m->m_pkthdr.len);
2153 			if (m->m_flags & M_MCAST)
2154 				if_statinc_ref(nsr, if_omcasts);
2155 
2156 			m_freem(m);
2157 			txb->vtxb_m = NULL;
2158 		}
2159 
2160 		txr->vxtxr_next = (txcd->eop_idx + 1) % txr->vxtxr_ndesc;
2161 	}
2162 	IF_STAT_PUTREF(ifp);
2163 
2164 	if (txr->vxtxr_head == txr->vxtxr_next)
2165 		txq->vxtxq_watchdog = 0;
2166 
2167 	return more;
2168 }
2169 
2170 static int
2171 vmxnet3_newbuf(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rxq,
2172     struct vmxnet3_rxring *rxr)
2173 {
2174 	struct mbuf *m;
2175 	struct vmxnet3_rxdesc *rxd;
2176 	struct vmxnet3_rxbuf *rxb;
2177 	bus_dma_tag_t tag;
2178 	bus_dmamap_t dmap;
2179 	int idx, btype, error;
2180 
2181 	tag = sc->vmx_dmat;
2182 	dmap = rxr->vxrxr_spare_dmap;
2183 	idx = rxr->vxrxr_fill;
2184 	rxd = &rxr->vxrxr_rxd[idx];
2185 	rxb = &rxr->vxrxr_rxbuf[idx];
2186 
2187 	/* Don't allocate buffers for ring 2 for now. */
2188 	if (rxr->vxrxr_rid != 0)
2189 		return -1;
2190 	btype = VMXNET3_BTYPE_HEAD;
2191 
2192 	MGETHDR(m, M_DONTWAIT, MT_DATA);
2193 	if (m == NULL)
2194 		return (ENOBUFS);
2195 
2196 	MCLGET(m, M_DONTWAIT);
2197 	if ((m->m_flags & M_EXT) == 0) {
2198 		rxq->vxrxq_mgetcl_failed.ev_count++;
2199 		m_freem(m);
2200 		return (ENOBUFS);
2201 	}
2202 
2203 	m->m_pkthdr.len = m->m_len = JUMBO_LEN;
2204 	m_adj(m, ETHER_ALIGN);
2205 
2206 	error = bus_dmamap_load_mbuf(sc->vmx_dmat, dmap, m, BUS_DMA_NOWAIT);
2207 	if (error) {
2208 		m_freem(m);
2209 		rxq->vxrxq_mbuf_load_failed.ev_count++;
2210 		return (error);
2211 	}
2212 
2213 	if (rxb->vrxb_m != NULL) {
2214 		bus_dmamap_sync(tag, rxb->vrxb_dmamap,
2215 		    0, rxb->vrxb_dmamap->dm_mapsize,
2216 		    BUS_DMASYNC_POSTREAD);
2217 		bus_dmamap_unload(tag, rxb->vrxb_dmamap);
2218 	}
2219 
2220 	rxr->vxrxr_spare_dmap = rxb->vrxb_dmamap;
2221 	rxb->vrxb_dmamap = dmap;
2222 	rxb->vrxb_m = m;
2223 
2224 	rxd->addr = DMAADDR(dmap);
2225 	rxd->len = m->m_pkthdr.len;
2226 	rxd->btype = btype;
2227 	rxd->gen = rxr->vxrxr_gen;
2228 
2229 	vmxnet3_rxr_increment_fill(rxr);
2230 	return (0);
2231 }
2232 
2233 static void
2234 vmxnet3_rxq_eof_discard(struct vmxnet3_rxqueue *rxq,
2235     struct vmxnet3_rxring *rxr, int idx)
2236 {
2237 	struct vmxnet3_rxdesc *rxd;
2238 
2239 	rxd = &rxr->vxrxr_rxd[idx];
2240 	rxd->gen = rxr->vxrxr_gen;
2241 	vmxnet3_rxr_increment_fill(rxr);
2242 }
2243 
2244 static void
2245 vmxnet3_rxq_discard_chain(struct vmxnet3_rxqueue *rxq)
2246 {
2247 	struct vmxnet3_softc *sc;
2248 	struct vmxnet3_rxring *rxr;
2249 	struct vmxnet3_comp_ring *rxc;
2250 	struct vmxnet3_rxcompdesc *rxcd;
2251 	int idx, eof;
2252 
2253 	sc = rxq->vxrxq_sc;
2254 	rxc = &rxq->vxrxq_comp_ring;
2255 
2256 	do {
2257 		rxcd = &rxc->vxcr_u.rxcd[rxc->vxcr_next];
2258 		if (rxcd->gen != rxc->vxcr_gen)
2259 			break;		/* Not expected. */
2260 		vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
2261 
2262 		if (++rxc->vxcr_next == rxc->vxcr_ndesc) {
2263 			rxc->vxcr_next = 0;
2264 			rxc->vxcr_gen ^= 1;
2265 		}
2266 
2267 		idx = rxcd->rxd_idx;
2268 		eof = rxcd->eop;
2269 		if (rxcd->qid < sc->vmx_nrxqueues)
2270 			rxr = &rxq->vxrxq_cmd_ring[0];
2271 		else
2272 			rxr = &rxq->vxrxq_cmd_ring[1];
2273 		vmxnet3_rxq_eof_discard(rxq, rxr, idx);
2274 	} while (!eof);
2275 }
2276 
2277 static void
2278 vmxnet3_rx_csum(struct vmxnet3_rxcompdesc *rxcd, struct mbuf *m)
2279 {
2280 	if (rxcd->no_csum)
2281 		return;
2282 
2283 	if (rxcd->ipv4) {
2284 		m->m_pkthdr.csum_flags |= M_CSUM_IPv4;
2285 		if (rxcd->ipcsum_ok == 0)
2286 			m->m_pkthdr.csum_flags |= M_CSUM_IPv4_BAD;
2287 	}
2288 
2289 	if (rxcd->fragment)
2290 		return;
2291 
2292 	if (rxcd->tcp) {
2293 		m->m_pkthdr.csum_flags |=
2294 		    rxcd->ipv4 ? M_CSUM_TCPv4 : M_CSUM_TCPv6;
2295 		if ((rxcd->csum_ok) == 0)
2296 			m->m_pkthdr.csum_flags |= M_CSUM_TCP_UDP_BAD;
2297 	}
2298 
2299 	if (rxcd->udp) {
2300 		m->m_pkthdr.csum_flags |=
2301 		    rxcd->ipv4 ? M_CSUM_UDPv4 : M_CSUM_UDPv6 ;
2302 		if ((rxcd->csum_ok) == 0)
2303 			m->m_pkthdr.csum_flags |= M_CSUM_TCP_UDP_BAD;
2304 	}
2305 }
2306 
2307 static void
2308 vmxnet3_rxq_input(struct vmxnet3_rxqueue *rxq,
2309     struct vmxnet3_rxcompdesc *rxcd, struct mbuf *m)
2310 {
2311 	struct vmxnet3_softc *sc;
2312 	struct ifnet *ifp;
2313 
2314 	sc = rxq->vxrxq_sc;
2315 	ifp = &sc->vmx_ethercom.ec_if;
2316 
2317 	if (rxcd->error) {
2318 		if_statinc(ifp, if_ierrors);
2319 		m_freem(m);
2320 		return;
2321 	}
2322 
2323 	if (!rxcd->no_csum)
2324 		vmxnet3_rx_csum(rxcd, m);
2325 	if (rxcd->vlan)
2326 		vlan_set_tag(m, rxcd->vtag);
2327 
2328 	net_stat_ref_t nsr = IF_STAT_GETREF(ifp);
2329 	if_statinc_ref(nsr, if_ipackets);
2330 	if_statadd_ref(nsr, if_ibytes, m->m_pkthdr.len);
2331 	IF_STAT_PUTREF(ifp);
2332 
2333 	if_percpuq_enqueue(ifp->if_percpuq, m);
2334 }
2335 
2336 static bool
2337 vmxnet3_rxq_eof(struct vmxnet3_rxqueue *rxq, u_int limit)
2338 {
2339 	struct vmxnet3_softc *sc;
2340 	struct ifnet *ifp;
2341 	struct vmxnet3_rxring *rxr;
2342 	struct vmxnet3_comp_ring *rxc;
2343 	struct vmxnet3_rxdesc *rxd __diagused;
2344 	struct vmxnet3_rxcompdesc *rxcd;
2345 	struct mbuf *m, *m_head, *m_tail;
2346 	u_int idx, length;
2347 	bool more = false;
2348 
2349 	sc = rxq->vxrxq_sc;
2350 	ifp = &sc->vmx_ethercom.ec_if;
2351 	rxc = &rxq->vxrxq_comp_ring;
2352 
2353 	VMXNET3_RXQ_LOCK_ASSERT(rxq);
2354 
2355 	if ((ifp->if_flags & IFF_RUNNING) == 0)
2356 		return more;
2357 
2358 	m_head = rxq->vxrxq_mhead;
2359 	rxq->vxrxq_mhead = NULL;
2360 	m_tail = rxq->vxrxq_mtail;
2361 	rxq->vxrxq_mtail = NULL;
2362 	KASSERT(m_head == NULL || m_tail != NULL);
2363 
2364 	for (;;) {
2365 		if (limit-- == 0) {
2366 			more = true;
2367 			break;
2368 		}
2369 
2370 		rxcd = &rxc->vxcr_u.rxcd[rxc->vxcr_next];
2371 		if (rxcd->gen != rxc->vxcr_gen) {
2372 			rxq->vxrxq_mhead = m_head;
2373 			rxq->vxrxq_mtail = m_tail;
2374 			break;
2375 		}
2376 		vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
2377 
2378 		if (++rxc->vxcr_next == rxc->vxcr_ndesc) {
2379 			rxc->vxcr_next = 0;
2380 			rxc->vxcr_gen ^= 1;
2381 		}
2382 
2383 		idx = rxcd->rxd_idx;
2384 		length = rxcd->len;
2385 		if (rxcd->qid < sc->vmx_nrxqueues)
2386 			rxr = &rxq->vxrxq_cmd_ring[0];
2387 		else
2388 			rxr = &rxq->vxrxq_cmd_ring[1];
2389 		rxd = &rxr->vxrxr_rxd[idx];
2390 
2391 		m = rxr->vxrxr_rxbuf[idx].vrxb_m;
2392 		KASSERT(m != NULL);
2393 
2394 		/*
2395 		 * The host may skip descriptors. We detect this when this
2396 		 * descriptor does not match the previous fill index. Catch
2397 		 * up with the host now.
2398 		 */
2399 		if (__predict_false(rxr->vxrxr_fill != idx)) {
2400 			while (rxr->vxrxr_fill != idx) {
2401 				rxr->vxrxr_rxd[rxr->vxrxr_fill].gen =
2402 				    rxr->vxrxr_gen;
2403 				vmxnet3_rxr_increment_fill(rxr);
2404 			}
2405 		}
2406 
2407 		if (rxcd->sop) {
2408 			/* start of frame w/o head buffer */
2409 			KASSERT(rxd->btype == VMXNET3_BTYPE_HEAD);
2410 			/* start of frame not in ring 0 */
2411 			KASSERT(rxr == &rxq->vxrxq_cmd_ring[0]);
2412 			/* duplicate start of frame? */
2413 			KASSERT(m_head == NULL);
2414 
2415 			if (length == 0) {
2416 				/* Just ignore this descriptor. */
2417 				vmxnet3_rxq_eof_discard(rxq, rxr, idx);
2418 				goto nextp;
2419 			}
2420 
2421 			if (vmxnet3_newbuf(sc, rxq, rxr) != 0) {
2422 				if_statinc(ifp, if_iqdrops);
2423 				vmxnet3_rxq_eof_discard(rxq, rxr, idx);
2424 				if (!rxcd->eop)
2425 					vmxnet3_rxq_discard_chain(rxq);
2426 				goto nextp;
2427 			}
2428 
2429 			m_set_rcvif(m, ifp);
2430 			m->m_pkthdr.len = m->m_len = length;
2431 			m->m_pkthdr.csum_flags = 0;
2432 			m_head = m_tail = m;
2433 
2434 		} else {
2435 			/* non start of frame w/o body buffer */
2436 			KASSERT(rxd->btype == VMXNET3_BTYPE_BODY);
2437 			/* frame not started? */
2438 			KASSERT(m_head != NULL);
2439 
2440 			if (vmxnet3_newbuf(sc, rxq, rxr) != 0) {
2441 				if_statinc(ifp, if_iqdrops);
2442 				vmxnet3_rxq_eof_discard(rxq, rxr, idx);
2443 				if (!rxcd->eop)
2444 					vmxnet3_rxq_discard_chain(rxq);
2445 				m_freem(m_head);
2446 				m_head = m_tail = NULL;
2447 				goto nextp;
2448 			}
2449 
2450 			m->m_len = length;
2451 			m_head->m_pkthdr.len += length;
2452 			m_tail->m_next = m;
2453 			m_tail = m;
2454 		}
2455 
2456 		if (rxcd->eop) {
2457 			vmxnet3_rxq_input(rxq, rxcd, m_head);
2458 			m_head = m_tail = NULL;
2459 
2460 			/* Must recheck after dropping the Rx lock. */
2461 			if ((ifp->if_flags & IFF_RUNNING) == 0)
2462 				break;
2463 		}
2464 
2465 nextp:
2466 		if (__predict_false(rxq->vxrxq_rs->update_rxhead)) {
2467 			int qid = rxcd->qid;
2468 			bus_size_t r;
2469 
2470 			idx = (idx + 1) % rxr->vxrxr_ndesc;
2471 			if (qid >= sc->vmx_nrxqueues) {
2472 				qid -= sc->vmx_nrxqueues;
2473 				r = VMXNET3_BAR0_RXH2(qid);
2474 			} else
2475 				r = VMXNET3_BAR0_RXH1(qid);
2476 			vmxnet3_write_bar0(sc, r, idx);
2477 		}
2478 	}
2479 
2480 	return more;
2481 }
2482 
2483 static inline void
2484 vmxnet3_sched_handle_queue(struct vmxnet3_softc *sc, struct vmxnet3_queue *vmxq)
2485 {
2486 
2487 	if (vmxq->vxq_workqueue) {
2488 		/*
2489 		 * When this function is called, "vmxq" is owned by one CPU.
2490 		 * so, atomic operation is not required here.
2491 		 */
2492 		if (!vmxq->vxq_wq_enqueued) {
2493 			vmxq->vxq_wq_enqueued = true;
2494 			workqueue_enqueue(sc->vmx_queue_wq,
2495 			    &vmxq->vxq_wq_cookie, curcpu());
2496 		}
2497 	} else {
2498 		softint_schedule(vmxq->vxq_si);
2499 	}
2500 }
2501 
2502 static int
2503 vmxnet3_legacy_intr(void *xsc)
2504 {
2505 	struct vmxnet3_softc *sc;
2506 	struct vmxnet3_rxqueue *rxq;
2507 	struct vmxnet3_txqueue *txq;
2508 	u_int txlimit, rxlimit;
2509 	bool txmore, rxmore;
2510 
2511 	sc = xsc;
2512 	rxq = &sc->vmx_queue[0].vxq_rxqueue;
2513 	txq = &sc->vmx_queue[0].vxq_txqueue;
2514 	txlimit = sc->vmx_tx_intr_process_limit;
2515 	rxlimit = sc->vmx_rx_intr_process_limit;
2516 
2517 	if (sc->vmx_intr_type == VMXNET3_IT_LEGACY) {
2518 		if (vmxnet3_read_bar1(sc, VMXNET3_BAR1_INTR) == 0)
2519 			return (0);
2520 	}
2521 	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
2522 		vmxnet3_disable_all_intrs(sc);
2523 
2524 	if (sc->vmx_ds->event != 0)
2525 		vmxnet3_evintr(sc);
2526 
2527 	VMXNET3_RXQ_LOCK(rxq);
2528 	rxmore = vmxnet3_rxq_eof(rxq, rxlimit);
2529 	VMXNET3_RXQ_UNLOCK(rxq);
2530 
2531 	VMXNET3_TXQ_LOCK(txq);
2532 	txmore = vmxnet3_txq_eof(txq, txlimit);
2533 	VMXNET3_TXQ_UNLOCK(txq);
2534 
2535 	if (txmore || rxmore) {
2536 		vmxnet3_sched_handle_queue(sc, &sc->vmx_queue[0]);
2537 	} else {
2538 		if_schedule_deferred_start(&sc->vmx_ethercom.ec_if);
2539 		vmxnet3_enable_all_intrs(sc);
2540 	}
2541 	return (1);
2542 }
2543 
2544 static int
2545 vmxnet3_txrxq_intr(void *xvmxq)
2546 {
2547 	struct vmxnet3_softc *sc;
2548 	struct vmxnet3_queue *vmxq;
2549 	struct vmxnet3_txqueue *txq;
2550 	struct vmxnet3_rxqueue *rxq;
2551 	u_int txlimit, rxlimit;
2552 	bool txmore, rxmore;
2553 
2554 	vmxq = xvmxq;
2555 	txq = &vmxq->vxq_txqueue;
2556 	rxq = &vmxq->vxq_rxqueue;
2557 	sc = txq->vxtxq_sc;
2558 	txlimit = sc->vmx_tx_intr_process_limit;
2559 	rxlimit = sc->vmx_rx_intr_process_limit;
2560 	vmxq->vxq_workqueue = sc->vmx_txrx_workqueue;
2561 
2562 	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
2563 		vmxnet3_disable_intr(sc, vmxq->vxq_intr_idx);
2564 
2565 	VMXNET3_TXQ_LOCK(txq);
2566 	txq->vxtxq_intr.ev_count++;
2567 	txmore = vmxnet3_txq_eof(txq, txlimit);
2568 	VMXNET3_TXQ_UNLOCK(txq);
2569 
2570 	VMXNET3_RXQ_LOCK(rxq);
2571 	rxq->vxrxq_intr.ev_count++;
2572 	rxmore = vmxnet3_rxq_eof(rxq, rxlimit);
2573 	VMXNET3_RXQ_UNLOCK(rxq);
2574 
2575 	if (txmore || rxmore) {
2576 		vmxnet3_sched_handle_queue(sc, vmxq);
2577 	} else {
2578 		/* for ALTQ */
2579 		if (vmxq->vxq_id == 0)
2580 			if_schedule_deferred_start(&sc->vmx_ethercom.ec_if);
2581 		softint_schedule(txq->vxtxq_si);
2582 
2583 		vmxnet3_enable_intr(sc, vmxq->vxq_intr_idx);
2584 	}
2585 
2586 	return (1);
2587 }
2588 
2589 static void
2590 vmxnet3_handle_queue(void *xvmxq)
2591 {
2592 	struct vmxnet3_softc *sc;
2593 	struct vmxnet3_queue *vmxq;
2594 	struct vmxnet3_txqueue *txq;
2595 	struct vmxnet3_rxqueue *rxq;
2596 	u_int txlimit, rxlimit;
2597 	bool txmore, rxmore;
2598 
2599 	vmxq = xvmxq;
2600 	txq = &vmxq->vxq_txqueue;
2601 	rxq = &vmxq->vxq_rxqueue;
2602 	sc = txq->vxtxq_sc;
2603 	txlimit = sc->vmx_tx_process_limit;
2604 	rxlimit = sc->vmx_rx_process_limit;
2605 
2606 	VMXNET3_TXQ_LOCK(txq);
2607 	txq->vxtxq_defer.ev_count++;
2608 	txmore = vmxnet3_txq_eof(txq, txlimit);
2609 	if (txmore)
2610 		txq->vxtxq_deferreq.ev_count++;
2611 	/* for ALTQ */
2612 	if (vmxq->vxq_id == 0)
2613 		if_schedule_deferred_start(&sc->vmx_ethercom.ec_if);
2614 	softint_schedule(txq->vxtxq_si);
2615 	VMXNET3_TXQ_UNLOCK(txq);
2616 
2617 	VMXNET3_RXQ_LOCK(rxq);
2618 	rxq->vxrxq_defer.ev_count++;
2619 	rxmore = vmxnet3_rxq_eof(rxq, rxlimit);
2620 	if (rxmore)
2621 		rxq->vxrxq_deferreq.ev_count++;
2622 	VMXNET3_RXQ_UNLOCK(rxq);
2623 
2624 	if (txmore || rxmore)
2625 		vmxnet3_sched_handle_queue(sc, vmxq);
2626 	else
2627 		vmxnet3_enable_intr(sc, vmxq->vxq_intr_idx);
2628 }
2629 
2630 static void
2631 vmxnet3_handle_queue_work(struct work *wk, void *context)
2632 {
2633 	struct vmxnet3_queue *vmxq;
2634 
2635 	vmxq = container_of(wk, struct vmxnet3_queue, vxq_wq_cookie);
2636 	vmxq->vxq_wq_enqueued = false;
2637 	vmxnet3_handle_queue(vmxq);
2638 }
2639 
2640 static int
2641 vmxnet3_event_intr(void *xsc)
2642 {
2643 	struct vmxnet3_softc *sc;
2644 
2645 	sc = xsc;
2646 
2647 	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
2648 		vmxnet3_disable_intr(sc, sc->vmx_event_intr_idx);
2649 
2650 	sc->vmx_event_intr.ev_count++;
2651 
2652 	if (sc->vmx_ds->event != 0)
2653 		vmxnet3_evintr(sc);
2654 
2655 	vmxnet3_enable_intr(sc, sc->vmx_event_intr_idx);
2656 
2657 	return (1);
2658 }
2659 
2660 static void
2661 vmxnet3_txstop(struct vmxnet3_softc *sc, struct vmxnet3_txqueue *txq)
2662 {
2663 	struct vmxnet3_txring *txr;
2664 	struct vmxnet3_txbuf *txb;
2665 	u_int i;
2666 
2667 	txr = &txq->vxtxq_cmd_ring;
2668 
2669 	for (i = 0; i < txr->vxtxr_ndesc; i++) {
2670 		txb = &txr->vxtxr_txbuf[i];
2671 
2672 		if (txb->vtxb_m == NULL)
2673 			continue;
2674 
2675 		bus_dmamap_sync(sc->vmx_dmat, txb->vtxb_dmamap,
2676 		    0, txb->vtxb_dmamap->dm_mapsize,
2677 		    BUS_DMASYNC_POSTWRITE);
2678 		bus_dmamap_unload(sc->vmx_dmat, txb->vtxb_dmamap);
2679 		m_freem(txb->vtxb_m);
2680 		txb->vtxb_m = NULL;
2681 	}
2682 }
2683 
2684 static void
2685 vmxnet3_rxstop(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rxq)
2686 {
2687 	struct vmxnet3_rxring *rxr;
2688 	struct vmxnet3_rxbuf *rxb;
2689 	u_int i, j;
2690 
2691 	if (rxq->vxrxq_mhead != NULL) {
2692 		m_freem(rxq->vxrxq_mhead);
2693 		rxq->vxrxq_mhead = NULL;
2694 		rxq->vxrxq_mtail = NULL;
2695 	}
2696 
2697 	for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
2698 		rxr = &rxq->vxrxq_cmd_ring[i];
2699 
2700 		for (j = 0; j < rxr->vxrxr_ndesc; j++) {
2701 			rxb = &rxr->vxrxr_rxbuf[j];
2702 
2703 			if (rxb->vrxb_m == NULL)
2704 				continue;
2705 
2706 			bus_dmamap_sync(sc->vmx_dmat, rxb->vrxb_dmamap,
2707 			    0, rxb->vrxb_dmamap->dm_mapsize,
2708 			    BUS_DMASYNC_POSTREAD);
2709 			bus_dmamap_unload(sc->vmx_dmat, rxb->vrxb_dmamap);
2710 			m_freem(rxb->vrxb_m);
2711 			rxb->vrxb_m = NULL;
2712 		}
2713 	}
2714 }
2715 
2716 static void
2717 vmxnet3_stop_rendezvous(struct vmxnet3_softc *sc)
2718 {
2719 	struct vmxnet3_rxqueue *rxq;
2720 	struct vmxnet3_txqueue *txq;
2721 	int i;
2722 
2723 	for (i = 0; i < sc->vmx_nrxqueues; i++) {
2724 		rxq = &sc->vmx_queue[i].vxq_rxqueue;
2725 		VMXNET3_RXQ_LOCK(rxq);
2726 		VMXNET3_RXQ_UNLOCK(rxq);
2727 	}
2728 	for (i = 0; i < sc->vmx_ntxqueues; i++) {
2729 		txq = &sc->vmx_queue[i].vxq_txqueue;
2730 		VMXNET3_TXQ_LOCK(txq);
2731 		VMXNET3_TXQ_UNLOCK(txq);
2732 	}
2733 }
2734 
2735 static void
2736 vmxnet3_stop_locked(struct vmxnet3_softc *sc)
2737 {
2738 	struct ifnet *ifp;
2739 	int q;
2740 
2741 	ifp = &sc->vmx_ethercom.ec_if;
2742 	VMXNET3_CORE_LOCK_ASSERT(sc);
2743 
2744 	ifp->if_flags &= ~IFF_RUNNING;
2745 	sc->vmx_link_active = 0;
2746 	callout_stop(&sc->vmx_tick);
2747 
2748 	/* Disable interrupts. */
2749 	vmxnet3_disable_all_intrs(sc);
2750 	vmxnet3_write_cmd(sc, VMXNET3_CMD_DISABLE);
2751 
2752 	vmxnet3_stop_rendezvous(sc);
2753 
2754 	for (q = 0; q < sc->vmx_ntxqueues; q++)
2755 		vmxnet3_txstop(sc, &sc->vmx_queue[q].vxq_txqueue);
2756 	for (q = 0; q < sc->vmx_nrxqueues; q++)
2757 		vmxnet3_rxstop(sc, &sc->vmx_queue[q].vxq_rxqueue);
2758 
2759 	vmxnet3_write_cmd(sc, VMXNET3_CMD_RESET);
2760 }
2761 
2762 static void
2763 vmxnet3_stop(struct ifnet *ifp, int disable)
2764 {
2765 	struct vmxnet3_softc *sc = ifp->if_softc;
2766 
2767 	VMXNET3_CORE_LOCK(sc);
2768 	vmxnet3_stop_locked(sc);
2769 	VMXNET3_CORE_UNLOCK(sc);
2770 }
2771 
2772 static void
2773 vmxnet3_txinit(struct vmxnet3_softc *sc, struct vmxnet3_txqueue *txq)
2774 {
2775 	struct vmxnet3_txring *txr;
2776 	struct vmxnet3_comp_ring *txc;
2777 
2778 	txr = &txq->vxtxq_cmd_ring;
2779 	txr->vxtxr_head = 0;
2780 	txr->vxtxr_next = 0;
2781 	txr->vxtxr_gen = VMXNET3_INIT_GEN;
2782 	memset(txr->vxtxr_txd, 0,
2783 	    txr->vxtxr_ndesc * sizeof(struct vmxnet3_txdesc));
2784 
2785 	txc = &txq->vxtxq_comp_ring;
2786 	txc->vxcr_next = 0;
2787 	txc->vxcr_gen = VMXNET3_INIT_GEN;
2788 	memset(txc->vxcr_u.txcd, 0,
2789 	    txc->vxcr_ndesc * sizeof(struct vmxnet3_txcompdesc));
2790 }
2791 
2792 static int
2793 vmxnet3_rxinit(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rxq)
2794 {
2795 	struct vmxnet3_rxring *rxr;
2796 	struct vmxnet3_comp_ring *rxc;
2797 	u_int i, populate, idx;
2798 	int error;
2799 
2800 	/* LRO and jumbo frame is not supported yet */
2801 	populate = 1;
2802 
2803 	for (i = 0; i < populate; i++) {
2804 		rxr = &rxq->vxrxq_cmd_ring[i];
2805 		rxr->vxrxr_fill = 0;
2806 		rxr->vxrxr_gen = VMXNET3_INIT_GEN;
2807 		memset(rxr->vxrxr_rxd, 0,
2808 		    rxr->vxrxr_ndesc * sizeof(struct vmxnet3_rxdesc));
2809 
2810 		for (idx = 0; idx < rxr->vxrxr_ndesc; idx++) {
2811 			error = vmxnet3_newbuf(sc, rxq, rxr);
2812 			if (error)
2813 				return (error);
2814 		}
2815 	}
2816 
2817 	for (/**/; i < VMXNET3_RXRINGS_PERQ; i++) {
2818 		rxr = &rxq->vxrxq_cmd_ring[i];
2819 		rxr->vxrxr_fill = 0;
2820 		rxr->vxrxr_gen = 0;
2821 		memset(rxr->vxrxr_rxd, 0,
2822 		    rxr->vxrxr_ndesc * sizeof(struct vmxnet3_rxdesc));
2823 	}
2824 
2825 	rxc = &rxq->vxrxq_comp_ring;
2826 	rxc->vxcr_next = 0;
2827 	rxc->vxcr_gen = VMXNET3_INIT_GEN;
2828 	memset(rxc->vxcr_u.rxcd, 0,
2829 	    rxc->vxcr_ndesc * sizeof(struct vmxnet3_rxcompdesc));
2830 
2831 	return (0);
2832 }
2833 
2834 static int
2835 vmxnet3_reinit_queues(struct vmxnet3_softc *sc)
2836 {
2837 	device_t dev;
2838 	int q, error;
2839 	dev = sc->vmx_dev;
2840 
2841 	for (q = 0; q < sc->vmx_ntxqueues; q++)
2842 		vmxnet3_txinit(sc, &sc->vmx_queue[q].vxq_txqueue);
2843 
2844 	for (q = 0; q < sc->vmx_nrxqueues; q++) {
2845 		error = vmxnet3_rxinit(sc, &sc->vmx_queue[q].vxq_rxqueue);
2846 		if (error) {
2847 			device_printf(dev, "cannot populate Rx queue %d\n", q);
2848 			return (error);
2849 		}
2850 	}
2851 
2852 	return (0);
2853 }
2854 
2855 static int
2856 vmxnet3_enable_device(struct vmxnet3_softc *sc)
2857 {
2858 	int q;
2859 
2860 	if (vmxnet3_read_cmd(sc, VMXNET3_CMD_ENABLE) != 0) {
2861 		device_printf(sc->vmx_dev, "device enable command failed!\n");
2862 		return (1);
2863 	}
2864 
2865 	/* Reset the Rx queue heads. */
2866 	for (q = 0; q < sc->vmx_nrxqueues; q++) {
2867 		vmxnet3_write_bar0(sc, VMXNET3_BAR0_RXH1(q), 0);
2868 		vmxnet3_write_bar0(sc, VMXNET3_BAR0_RXH2(q), 0);
2869 	}
2870 
2871 	return (0);
2872 }
2873 
2874 static void
2875 vmxnet3_reinit_rxfilters(struct vmxnet3_softc *sc)
2876 {
2877 
2878 	vmxnet3_set_rxfilter(sc);
2879 
2880 	memset(sc->vmx_ds->vlan_filter, 0, sizeof(sc->vmx_ds->vlan_filter));
2881 	vmxnet3_write_cmd(sc, VMXNET3_CMD_VLAN_FILTER);
2882 }
2883 
2884 static int
2885 vmxnet3_reinit(struct vmxnet3_softc *sc)
2886 {
2887 
2888 	vmxnet3_set_lladdr(sc);
2889 	vmxnet3_reinit_shared_data(sc);
2890 
2891 	if (vmxnet3_reinit_queues(sc) != 0)
2892 		return (ENXIO);
2893 
2894 	if (vmxnet3_enable_device(sc) != 0)
2895 		return (ENXIO);
2896 
2897 	vmxnet3_reinit_rxfilters(sc);
2898 
2899 	return (0);
2900 }
2901 
2902 static int
2903 vmxnet3_init_locked(struct vmxnet3_softc *sc)
2904 {
2905 	struct ifnet *ifp = &sc->vmx_ethercom.ec_if;
2906 	int error;
2907 
2908 	vmxnet3_stop_locked(sc);
2909 
2910 	error = vmxnet3_reinit(sc);
2911 	if (error) {
2912 		vmxnet3_stop_locked(sc);
2913 		return (error);
2914 	}
2915 
2916 	ifp->if_flags |= IFF_RUNNING;
2917 	vmxnet3_if_link_status(sc);
2918 
2919 	vmxnet3_enable_all_intrs(sc);
2920 	callout_reset(&sc->vmx_tick, hz, vmxnet3_tick, sc);
2921 
2922 	return (0);
2923 }
2924 
2925 static int
2926 vmxnet3_init(struct ifnet *ifp)
2927 {
2928 	struct vmxnet3_softc *sc = ifp->if_softc;
2929 	int error;
2930 
2931 	VMXNET3_CORE_LOCK(sc);
2932 	error = vmxnet3_init_locked(sc);
2933 	VMXNET3_CORE_UNLOCK(sc);
2934 
2935 	return (error);
2936 }
2937 
2938 static int
2939 vmxnet3_txq_offload_ctx(struct vmxnet3_txqueue *txq, struct mbuf *m,
2940     int *start, int *csum_start)
2941 {
2942 	struct ether_header *eh;
2943 	struct mbuf *mp;
2944 	int offset, csum_off, iphl, offp;
2945 	bool v4;
2946 
2947 	eh = mtod(m, struct ether_header *);
2948 	switch (htons(eh->ether_type)) {
2949 	case ETHERTYPE_IP:
2950 	case ETHERTYPE_IPV6:
2951 		offset = ETHER_HDR_LEN;
2952 		break;
2953 	case ETHERTYPE_VLAN:
2954 		offset = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
2955 		break;
2956 	default:
2957 		m_freem(m);
2958 		return (EINVAL);
2959 	}
2960 
2961 	if ((m->m_pkthdr.csum_flags &
2962 	    (M_CSUM_TSOv4 | M_CSUM_UDPv4 | M_CSUM_TCPv4)) != 0) {
2963 		iphl = M_CSUM_DATA_IPv4_IPHL(m->m_pkthdr.csum_data);
2964 		v4 = true;
2965 	} else {
2966 		iphl = M_CSUM_DATA_IPv6_IPHL(m->m_pkthdr.csum_data);
2967 		v4 = false;
2968 	}
2969 	*start = offset + iphl;
2970 
2971 	if (m->m_pkthdr.csum_flags &
2972 	    (M_CSUM_TCPv4 | M_CSUM_TCPv6 | M_CSUM_TSOv4 | M_CSUM_TSOv6)) {
2973 		csum_off = offsetof(struct tcphdr, th_sum);
2974 	} else {
2975 		csum_off = offsetof(struct udphdr, uh_sum);
2976 	}
2977 
2978 	*csum_start = *start + csum_off;
2979 	mp = m_pulldown(m, 0, *csum_start + 2, &offp);
2980 	if (!mp) {
2981 		/* m is already freed */
2982 		return ENOBUFS;
2983 	}
2984 
2985 	if (m->m_pkthdr.csum_flags & (M_CSUM_TSOv4 | M_CSUM_TSOv6)) {
2986 		struct tcphdr *tcp;
2987 
2988 		txq->vxtxq_stats.vmtxs_tso++;
2989 		tcp = (void *)(mtod(mp, char *) + offp + *start);
2990 
2991 		if (v4) {
2992 			struct ip *ip;
2993 
2994 			ip = (void *)(mtod(mp, char *) + offp + offset);
2995 			tcp->th_sum = in_cksum_phdr(ip->ip_src.s_addr,
2996 			    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
2997 		} else {
2998 			struct ip6_hdr *ip6;
2999 
3000 			ip6 = (void *)(mtod(mp, char *) + offp + offset);
3001 			tcp->th_sum = in6_cksum_phdr(&ip6->ip6_src,
3002 			    &ip6->ip6_dst, 0, htonl(IPPROTO_TCP));
3003 		}
3004 
3005 		/*
3006 		 * For TSO, the size of the protocol header is also
3007 		 * included in the descriptor header size.
3008 		 */
3009 		*start += (tcp->th_off << 2);
3010 	} else
3011 		txq->vxtxq_stats.vmtxs_csum++;
3012 
3013 	return (0);
3014 }
3015 
3016 static int
3017 vmxnet3_txq_load_mbuf(struct vmxnet3_txqueue *txq, struct mbuf **m0,
3018     bus_dmamap_t dmap)
3019 {
3020 	struct mbuf *m;
3021 	bus_dma_tag_t tag;
3022 	int error;
3023 
3024 	m = *m0;
3025 	tag = txq->vxtxq_sc->vmx_dmat;
3026 
3027 	error = bus_dmamap_load_mbuf(tag, dmap, m, BUS_DMA_NOWAIT);
3028 	if (error == 0 || error != EFBIG)
3029 		return (error);
3030 
3031 	m = m_defrag(m, M_NOWAIT);
3032 	if (m != NULL) {
3033 		*m0 = m;
3034 		error = bus_dmamap_load_mbuf(tag, dmap, m, BUS_DMA_NOWAIT);
3035 	} else
3036 		error = ENOBUFS;
3037 
3038 	if (error) {
3039 		m_freem(*m0);
3040 		*m0 = NULL;
3041 		txq->vxtxq_defrag_failed.ev_count++;
3042 	} else
3043 		txq->vxtxq_defragged.ev_count++;
3044 
3045 	return (error);
3046 }
3047 
3048 static void
3049 vmxnet3_txq_unload_mbuf(struct vmxnet3_txqueue *txq, bus_dmamap_t dmap)
3050 {
3051 
3052 	bus_dmamap_unload(txq->vxtxq_sc->vmx_dmat, dmap);
3053 }
3054 
3055 static int
3056 vmxnet3_txq_encap(struct vmxnet3_txqueue *txq, struct mbuf **m0)
3057 {
3058 	struct vmxnet3_softc *sc;
3059 	struct vmxnet3_txring *txr;
3060 	struct vmxnet3_txdesc *txd, *sop;
3061 	struct mbuf *m;
3062 	bus_dmamap_t dmap;
3063 	bus_dma_segment_t *segs;
3064 	int i, gen, start, csum_start, nsegs, error;
3065 
3066 	sc = txq->vxtxq_sc;
3067 	start = 0;
3068 	txd = NULL;
3069 	txr = &txq->vxtxq_cmd_ring;
3070 	dmap = txr->vxtxr_txbuf[txr->vxtxr_head].vtxb_dmamap;
3071 	csum_start = 0; /* GCC */
3072 
3073 	error = vmxnet3_txq_load_mbuf(txq, m0, dmap);
3074 	if (error)
3075 		return (error);
3076 
3077 	nsegs = dmap->dm_nsegs;
3078 	segs = dmap->dm_segs;
3079 
3080 	m = *m0;
3081 	KASSERT(m->m_flags & M_PKTHDR);
3082 	KASSERT(nsegs <= VMXNET3_TX_MAXSEGS);
3083 
3084 	if (vmxnet3_txring_avail(txr) < nsegs) {
3085 		txq->vxtxq_stats.vmtxs_full++;
3086 		vmxnet3_txq_unload_mbuf(txq, dmap);
3087 		return (ENOSPC);
3088 	} else if (m->m_pkthdr.csum_flags & VMXNET3_CSUM_ALL_OFFLOAD) {
3089 		error = vmxnet3_txq_offload_ctx(txq, m, &start, &csum_start);
3090 		if (error) {
3091 			/* m is already freed */
3092 			txq->vxtxq_stats.vmtxs_offload_failed++;
3093 			vmxnet3_txq_unload_mbuf(txq, dmap);
3094 			*m0 = NULL;
3095 			return (error);
3096 		}
3097 	}
3098 
3099 	txr->vxtxr_txbuf[txr->vxtxr_head].vtxb_m = m;
3100 	sop = &txr->vxtxr_txd[txr->vxtxr_head];
3101 	gen = txr->vxtxr_gen ^ 1;	/* Owned by cpu (yet) */
3102 
3103 	for (i = 0; i < nsegs; i++) {
3104 		txd = &txr->vxtxr_txd[txr->vxtxr_head];
3105 
3106 		txd->addr = segs[i].ds_addr;
3107 		txd->len = segs[i].ds_len;
3108 		txd->gen = gen;
3109 		txd->dtype = 0;
3110 		txd->offload_mode = VMXNET3_OM_NONE;
3111 		txd->offload_pos = 0;
3112 		txd->hlen = 0;
3113 		txd->eop = 0;
3114 		txd->compreq = 0;
3115 		txd->vtag_mode = 0;
3116 		txd->vtag = 0;
3117 
3118 		if (++txr->vxtxr_head == txr->vxtxr_ndesc) {
3119 			txr->vxtxr_head = 0;
3120 			txr->vxtxr_gen ^= 1;
3121 		}
3122 		gen = txr->vxtxr_gen;
3123 	}
3124 	txd->eop = 1;
3125 	txd->compreq = 1;
3126 
3127 	if (vlan_has_tag(m)) {
3128 		sop->vtag_mode = 1;
3129 		sop->vtag = vlan_get_tag(m);
3130 	}
3131 
3132 	if (m->m_pkthdr.csum_flags & (M_CSUM_TSOv4 | M_CSUM_TSOv6)) {
3133 		sop->offload_mode = VMXNET3_OM_TSO;
3134 		sop->hlen = start;
3135 		sop->offload_pos = m->m_pkthdr.segsz;
3136 	} else if (m->m_pkthdr.csum_flags & (VMXNET3_CSUM_OFFLOAD |
3137 	    VMXNET3_CSUM_OFFLOAD_IPV6)) {
3138 		sop->offload_mode = VMXNET3_OM_CSUM;
3139 		sop->hlen = start;
3140 		sop->offload_pos = csum_start;
3141 	}
3142 
3143 	/* Finally, change the ownership. */
3144 	vmxnet3_barrier(sc, VMXNET3_BARRIER_WR);
3145 	sop->gen ^= 1;
3146 
3147 	txq->vxtxq_ts->npending += nsegs;
3148 	if (txq->vxtxq_ts->npending >= txq->vxtxq_ts->intr_threshold) {
3149 		struct vmxnet3_queue *vmxq;
3150 		vmxq = container_of(txq, struct vmxnet3_queue, vxq_txqueue);
3151 		txq->vxtxq_ts->npending = 0;
3152 		vmxnet3_write_bar0(sc, VMXNET3_BAR0_TXH(vmxq->vxq_id),
3153 		    txr->vxtxr_head);
3154 	}
3155 
3156 	return (0);
3157 }
3158 
3159 #define VMXNET3_TX_START 1
3160 #define VMXNET3_TX_TRANSMIT 2
3161 static inline void
3162 vmxnet3_tx_common_locked(struct ifnet *ifp, struct vmxnet3_txqueue *txq, int txtype)
3163 {
3164 	struct vmxnet3_softc *sc;
3165 	struct vmxnet3_txring *txr;
3166 	struct mbuf *m_head;
3167 	int tx;
3168 
3169 	sc = ifp->if_softc;
3170 	txr = &txq->vxtxq_cmd_ring;
3171 	tx = 0;
3172 
3173 	VMXNET3_TXQ_LOCK_ASSERT(txq);
3174 
3175 	if ((ifp->if_flags & IFF_RUNNING) == 0 ||
3176 	    sc->vmx_link_active == 0)
3177 		return;
3178 
3179 	for (;;) {
3180 		if (txtype == VMXNET3_TX_START)
3181 			IFQ_POLL(&ifp->if_snd, m_head);
3182 		else
3183 			m_head = pcq_peek(txq->vxtxq_interq);
3184 		if (m_head == NULL)
3185 			break;
3186 
3187 		if (vmxnet3_txring_avail(txr) < VMXNET3_TX_MAXSEGS)
3188 			break;
3189 
3190 		if (txtype == VMXNET3_TX_START)
3191 			IFQ_DEQUEUE(&ifp->if_snd, m_head);
3192 		else
3193 			m_head = pcq_get(txq->vxtxq_interq);
3194 		if (m_head == NULL)
3195 			break;
3196 
3197 		if (vmxnet3_txq_encap(txq, &m_head) != 0) {
3198 			if (m_head != NULL)
3199 				m_freem(m_head);
3200 			break;
3201 		}
3202 
3203 		tx++;
3204 		bpf_mtap(ifp, m_head, BPF_D_OUT);
3205 	}
3206 
3207 	if (tx > 0)
3208 		txq->vxtxq_watchdog = VMXNET3_WATCHDOG_TIMEOUT;
3209 }
3210 
3211 static void
3212 vmxnet3_start_locked(struct ifnet *ifp)
3213 {
3214 	struct vmxnet3_softc *sc;
3215 	struct vmxnet3_txqueue *txq;
3216 
3217 	sc = ifp->if_softc;
3218 	txq = &sc->vmx_queue[0].vxq_txqueue;
3219 
3220 	vmxnet3_tx_common_locked(ifp, txq, VMXNET3_TX_START);
3221 }
3222 
3223 void
3224 vmxnet3_start(struct ifnet *ifp)
3225 {
3226 	struct vmxnet3_softc *sc;
3227 	struct vmxnet3_txqueue *txq;
3228 
3229 	sc = ifp->if_softc;
3230 	txq = &sc->vmx_queue[0].vxq_txqueue;
3231 
3232 	VMXNET3_TXQ_LOCK(txq);
3233 	vmxnet3_start_locked(ifp);
3234 	VMXNET3_TXQ_UNLOCK(txq);
3235 }
3236 
3237 static int
3238 vmxnet3_select_txqueue(struct ifnet *ifp, struct mbuf *m __unused)
3239 {
3240 	struct vmxnet3_softc *sc;
3241 	u_int cpuid;
3242 
3243 	sc = ifp->if_softc;
3244 	cpuid = cpu_index(curcpu());
3245 	/*
3246 	 * Furure work
3247 	 * We should select txqueue to even up the load even if ncpu is
3248 	 * different from sc->vmx_ntxqueues. Currently, the load is not
3249 	 * even, that is, when ncpu is six and ntxqueues is four, the load
3250 	 * of vmx_queue[0] and vmx_queue[1] is higher than vmx_queue[2] and
3251 	 * vmx_queue[3] because CPU#4 always uses vmx_queue[0] and CPU#5 always
3252 	 * uses vmx_queue[1].
3253 	 * Furthermore, we should not use random value to select txqueue to
3254 	 * avoid reordering. We should use flow information of mbuf.
3255 	 */
3256 	return cpuid % sc->vmx_ntxqueues;
3257 }
3258 
3259 static void
3260 vmxnet3_transmit_locked(struct ifnet *ifp, struct vmxnet3_txqueue *txq)
3261 {
3262 
3263 	vmxnet3_tx_common_locked(ifp, txq, VMXNET3_TX_TRANSMIT);
3264 }
3265 
3266 static int
3267 vmxnet3_transmit(struct ifnet *ifp, struct mbuf *m)
3268 {
3269 	struct vmxnet3_softc *sc;
3270 	struct vmxnet3_txqueue *txq;
3271 	int qid;
3272 
3273 	qid = vmxnet3_select_txqueue(ifp, m);
3274 	sc = ifp->if_softc;
3275 	txq = &sc->vmx_queue[qid].vxq_txqueue;
3276 
3277 	if (__predict_false(!pcq_put(txq->vxtxq_interq, m))) {
3278 		VMXNET3_TXQ_LOCK(txq);
3279 		txq->vxtxq_pcqdrop.ev_count++;
3280 		VMXNET3_TXQ_UNLOCK(txq);
3281 		m_freem(m);
3282 		return ENOBUFS;
3283 	}
3284 
3285 	if (VMXNET3_TXQ_TRYLOCK(txq)) {
3286 		vmxnet3_transmit_locked(ifp, txq);
3287 		VMXNET3_TXQ_UNLOCK(txq);
3288 	} else {
3289 		kpreempt_disable();
3290 		softint_schedule(txq->vxtxq_si);
3291 		kpreempt_enable();
3292 	}
3293 
3294 	return 0;
3295 }
3296 
3297 static void
3298 vmxnet3_deferred_transmit(void *arg)
3299 {
3300 	struct vmxnet3_txqueue *txq = arg;
3301 	struct vmxnet3_softc *sc = txq->vxtxq_sc;
3302 	struct ifnet *ifp = &sc->vmx_ethercom.ec_if;
3303 
3304 	VMXNET3_TXQ_LOCK(txq);
3305 	txq->vxtxq_transmitdef.ev_count++;
3306 	if (pcq_peek(txq->vxtxq_interq) != NULL)
3307 		vmxnet3_transmit_locked(ifp, txq);
3308 	VMXNET3_TXQ_UNLOCK(txq);
3309 }
3310 
3311 static void
3312 vmxnet3_set_rxfilter(struct vmxnet3_softc *sc)
3313 {
3314 	struct ifnet *ifp = &sc->vmx_ethercom.ec_if;
3315 	struct ethercom *ec = &sc->vmx_ethercom;
3316 	struct vmxnet3_driver_shared *ds = sc->vmx_ds;
3317 	struct ether_multi *enm;
3318 	struct ether_multistep step;
3319 	u_int mode;
3320 	uint8_t *p;
3321 
3322 	ds->mcast_tablelen = 0;
3323 	ETHER_LOCK(ec);
3324 	CLR(ec->ec_flags, ETHER_F_ALLMULTI);
3325 	ETHER_UNLOCK(ec);
3326 
3327 	/*
3328 	 * Always accept broadcast frames.
3329 	 * Always accept frames destined to our station address.
3330 	 */
3331 	mode = VMXNET3_RXMODE_BCAST | VMXNET3_RXMODE_UCAST;
3332 
3333 	ETHER_LOCK(ec);
3334 	if (ISSET(ifp->if_flags, IFF_PROMISC) ||
3335 	    ec->ec_multicnt > VMXNET3_MULTICAST_MAX)
3336 		goto allmulti;
3337 
3338 	p = sc->vmx_mcast;
3339 	ETHER_FIRST_MULTI(step, ec, enm);
3340 	while (enm != NULL) {
3341 		if (memcmp(enm->enm_addrlo, enm->enm_addrhi, ETHER_ADDR_LEN)) {
3342 			/*
3343 			 * We must listen to a range of multicast addresses.
3344 			 * For now, just accept all multicasts, rather than
3345 			 * trying to set only those filter bits needed to match
3346 			 * the range.  (At this time, the only use of address
3347 			 * ranges is for IP multicast routing, for which the
3348 			 * range is big enough to require all bits set.)
3349 			 */
3350 			goto allmulti;
3351 		}
3352 		memcpy(p, enm->enm_addrlo, ETHER_ADDR_LEN);
3353 
3354 		p += ETHER_ADDR_LEN;
3355 
3356 		ETHER_NEXT_MULTI(step, enm);
3357 	}
3358 
3359 	if (ec->ec_multicnt > 0) {
3360 		SET(mode, VMXNET3_RXMODE_MCAST);
3361 		ds->mcast_tablelen = p - sc->vmx_mcast;
3362 	}
3363 	ETHER_UNLOCK(ec);
3364 
3365 	goto setit;
3366 
3367 allmulti:
3368 	SET(ec->ec_flags, ETHER_F_ALLMULTI);
3369 	ETHER_UNLOCK(ec);
3370 	SET(mode, (VMXNET3_RXMODE_ALLMULTI | VMXNET3_RXMODE_MCAST));
3371 	if (ifp->if_flags & IFF_PROMISC)
3372 		SET(mode, VMXNET3_RXMODE_PROMISC);
3373 
3374 setit:
3375 	vmxnet3_write_cmd(sc, VMXNET3_CMD_SET_FILTER);
3376 	ds->rxmode = mode;
3377 	vmxnet3_write_cmd(sc, VMXNET3_CMD_SET_RXMODE);
3378 }
3379 
3380 static int
3381 vmxnet3_ioctl(struct ifnet *ifp, u_long cmd, void *data)
3382 {
3383 	struct vmxnet3_softc *sc = ifp->if_softc;
3384 	struct ifreq *ifr = (struct ifreq *)data;
3385 	int s, error = 0;
3386 
3387 	switch (cmd) {
3388 	case SIOCSIFMTU: {
3389 		int nmtu = ifr->ifr_mtu;
3390 
3391 		if (nmtu < VMXNET3_MIN_MTU || nmtu > VMXNET3_MAX_MTU) {
3392 			error = EINVAL;
3393 			break;
3394 		}
3395 		if (ifp->if_mtu != (uint64_t)nmtu) {
3396 			s = splnet();
3397 			error = ether_ioctl(ifp, cmd, data);
3398 			splx(s);
3399 			if (error == ENETRESET)
3400 				error = vmxnet3_init(ifp);
3401 		}
3402 		break;
3403 	}
3404 
3405 	default:
3406 		s = splnet();
3407 		error = ether_ioctl(ifp, cmd, data);
3408 		splx(s);
3409 	}
3410 
3411 	if (error == ENETRESET) {
3412 		VMXNET3_CORE_LOCK(sc);
3413 		if (ifp->if_flags & IFF_RUNNING)
3414 			vmxnet3_set_rxfilter(sc);
3415 		VMXNET3_CORE_UNLOCK(sc);
3416 		error = 0;
3417 	}
3418 
3419 	return error;
3420 }
3421 
3422 static int
3423 vmxnet3_ifflags_cb(struct ethercom *ec)
3424 {
3425 	struct vmxnet3_softc *sc;
3426 
3427 	sc = ec->ec_if.if_softc;
3428 
3429 	VMXNET3_CORE_LOCK(sc);
3430 	vmxnet3_set_rxfilter(sc);
3431 	VMXNET3_CORE_UNLOCK(sc);
3432 
3433 	vmxnet3_if_link_status(sc);
3434 
3435 	return 0;
3436 }
3437 
3438 static int
3439 vmxnet3_watchdog(struct vmxnet3_txqueue *txq)
3440 {
3441 	struct vmxnet3_softc *sc;
3442 	struct vmxnet3_queue *vmxq;
3443 
3444 	sc = txq->vxtxq_sc;
3445 	vmxq = container_of(txq, struct vmxnet3_queue, vxq_txqueue);
3446 
3447 	VMXNET3_TXQ_LOCK(txq);
3448 	if (txq->vxtxq_watchdog == 0 || --txq->vxtxq_watchdog) {
3449 		VMXNET3_TXQ_UNLOCK(txq);
3450 		return (0);
3451 	}
3452 	txq->vxtxq_watchdogto.ev_count++;
3453 	VMXNET3_TXQ_UNLOCK(txq);
3454 
3455 	device_printf(sc->vmx_dev, "watchdog timeout on queue %d\n",
3456 	    vmxq->vxq_id);
3457 	return (1);
3458 }
3459 
3460 static void
3461 vmxnet3_refresh_host_stats(struct vmxnet3_softc *sc)
3462 {
3463 
3464 	vmxnet3_write_cmd(sc, VMXNET3_CMD_GET_STATS);
3465 }
3466 
3467 static void
3468 vmxnet3_tick(void *xsc)
3469 {
3470 	struct vmxnet3_softc *sc;
3471 	int i, timedout;
3472 
3473 	sc = xsc;
3474 	timedout = 0;
3475 
3476 	VMXNET3_CORE_LOCK(sc);
3477 
3478 	vmxnet3_refresh_host_stats(sc);
3479 
3480 	for (i = 0; i < sc->vmx_ntxqueues; i++)
3481 		timedout |= vmxnet3_watchdog(&sc->vmx_queue[i].vxq_txqueue);
3482 
3483 	if (timedout != 0)
3484 		vmxnet3_init_locked(sc);
3485 	else
3486 		callout_reset(&sc->vmx_tick, hz, vmxnet3_tick, sc);
3487 
3488 	VMXNET3_CORE_UNLOCK(sc);
3489 }
3490 
3491 /*
3492  * update link state of ifnet and softc
3493  */
3494 static void
3495 vmxnet3_if_link_status(struct vmxnet3_softc *sc)
3496 {
3497 	struct ifnet *ifp = &sc->vmx_ethercom.ec_if;
3498 	u_int link;
3499 	bool up;
3500 
3501 	up = vmxnet3_cmd_link_status(ifp);
3502 	if (up) {
3503 		sc->vmx_link_active = 1;
3504 		link = LINK_STATE_UP;
3505 	} else {
3506 		sc->vmx_link_active = 0;
3507 		link = LINK_STATE_DOWN;
3508 	}
3509 
3510 	if_link_state_change(ifp, link);
3511 }
3512 
3513 /*
3514  * check vmx(4) state by VMXNET3_CMD and update ifp->if_baudrate
3515  *   returns
3516  *       - true:  link up
3517  *       - flase: link down
3518  */
3519 static bool
3520 vmxnet3_cmd_link_status(struct ifnet *ifp)
3521 {
3522 	struct vmxnet3_softc *sc = ifp->if_softc;
3523 	u_int x, speed;
3524 
3525 	x = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_LINK);
3526 	if ((x & 1) == 0)
3527 		return false;
3528 
3529 	speed = x >> 16;
3530 	ifp->if_baudrate = IF_Mbps(speed);
3531 	return true;
3532 }
3533 
3534 static void
3535 vmxnet3_ifmedia_status(struct ifnet *ifp, struct ifmediareq *ifmr)
3536 {
3537 	bool up;
3538 
3539 	ifmr->ifm_status = IFM_AVALID;
3540 	ifmr->ifm_active = IFM_ETHER;
3541 
3542 	up = vmxnet3_cmd_link_status(ifp);
3543 	if (!up)
3544 		return;
3545 
3546 	ifmr->ifm_status |= IFM_ACTIVE;
3547 
3548 	if (ifp->if_baudrate >= IF_Gbps(10ULL))
3549 		ifmr->ifm_active |= IFM_10G_T;
3550 }
3551 
3552 static int
3553 vmxnet3_ifmedia_change(struct ifnet *ifp)
3554 {
3555 	return 0;
3556 }
3557 
3558 static void
3559 vmxnet3_set_lladdr(struct vmxnet3_softc *sc)
3560 {
3561 	uint32_t ml, mh;
3562 
3563 	ml  = sc->vmx_lladdr[0];
3564 	ml |= sc->vmx_lladdr[1] << 8;
3565 	ml |= sc->vmx_lladdr[2] << 16;
3566 	ml |= sc->vmx_lladdr[3] << 24;
3567 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_MACL, ml);
3568 
3569 	mh  = sc->vmx_lladdr[4];
3570 	mh |= sc->vmx_lladdr[5] << 8;
3571 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_MACH, mh);
3572 }
3573 
3574 static void
3575 vmxnet3_get_lladdr(struct vmxnet3_softc *sc)
3576 {
3577 	uint32_t ml, mh;
3578 
3579 	ml = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_MACL);
3580 	mh = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_MACH);
3581 
3582 	sc->vmx_lladdr[0] = ml;
3583 	sc->vmx_lladdr[1] = ml >> 8;
3584 	sc->vmx_lladdr[2] = ml >> 16;
3585 	sc->vmx_lladdr[3] = ml >> 24;
3586 	sc->vmx_lladdr[4] = mh;
3587 	sc->vmx_lladdr[5] = mh >> 8;
3588 }
3589 
3590 static void
3591 vmxnet3_enable_all_intrs(struct vmxnet3_softc *sc)
3592 {
3593 	int i;
3594 
3595 	sc->vmx_ds->ictrl &= ~VMXNET3_ICTRL_DISABLE_ALL;
3596 	for (i = 0; i < sc->vmx_nintrs; i++)
3597 		vmxnet3_enable_intr(sc, i);
3598 }
3599 
3600 static void
3601 vmxnet3_disable_all_intrs(struct vmxnet3_softc *sc)
3602 {
3603 	int i;
3604 
3605 	sc->vmx_ds->ictrl |= VMXNET3_ICTRL_DISABLE_ALL;
3606 	for (i = 0; i < sc->vmx_nintrs; i++)
3607 		vmxnet3_disable_intr(sc, i);
3608 }
3609 
3610 static int
3611 vmxnet3_dma_malloc(struct vmxnet3_softc *sc, bus_size_t size, bus_size_t align,
3612     struct vmxnet3_dma_alloc *dma)
3613 {
3614 	bus_dma_tag_t t = sc->vmx_dmat;
3615 	bus_dma_segment_t *segs = dma->dma_segs;
3616 	int n, error;
3617 
3618 	memset(dma, 0, sizeof(*dma));
3619 
3620 	error = bus_dmamem_alloc(t, size, align, 0, segs, 1, &n, BUS_DMA_NOWAIT);
3621 	if (error) {
3622 		aprint_error_dev(sc->vmx_dev, "bus_dmamem_alloc failed: %d\n", error);
3623 		goto fail1;
3624 	}
3625 	KASSERT(n == 1);
3626 
3627 	error = bus_dmamem_map(t, segs, 1, size, &dma->dma_vaddr, BUS_DMA_NOWAIT);
3628 	if (error) {
3629 		aprint_error_dev(sc->vmx_dev, "bus_dmamem_map failed: %d\n", error);
3630 		goto fail2;
3631 	}
3632 
3633 	error = bus_dmamap_create(t, size, 1, size, 0, BUS_DMA_NOWAIT, &dma->dma_map);
3634 	if (error) {
3635 		aprint_error_dev(sc->vmx_dev, "bus_dmamap_create failed: %d\n", error);
3636 		goto fail3;
3637 	}
3638 
3639 	error = bus_dmamap_load(t, dma->dma_map, dma->dma_vaddr, size, NULL,
3640 	    BUS_DMA_NOWAIT);
3641 	if (error) {
3642 		aprint_error_dev(sc->vmx_dev, "bus_dmamap_load failed: %d\n", error);
3643 		goto fail4;
3644 	}
3645 
3646 	memset(dma->dma_vaddr, 0, size);
3647 	dma->dma_paddr = DMAADDR(dma->dma_map);
3648 	dma->dma_size = size;
3649 
3650 	return (0);
3651 fail4:
3652 	bus_dmamap_destroy(t, dma->dma_map);
3653 fail3:
3654 	bus_dmamem_unmap(t, dma->dma_vaddr, size);
3655 fail2:
3656 	bus_dmamem_free(t, segs, 1);
3657 fail1:
3658 	return (error);
3659 }
3660 
3661 static void
3662 vmxnet3_dma_free(struct vmxnet3_softc *sc, struct vmxnet3_dma_alloc *dma)
3663 {
3664 	bus_dma_tag_t t = sc->vmx_dmat;
3665 
3666 	bus_dmamap_unload(t, dma->dma_map);
3667 	bus_dmamap_destroy(t, dma->dma_map);
3668 	bus_dmamem_unmap(t, dma->dma_vaddr, dma->dma_size);
3669 	bus_dmamem_free(t, dma->dma_segs, 1);
3670 
3671 	memset(dma, 0, sizeof(*dma));
3672 }
3673 
3674 MODULE(MODULE_CLASS_DRIVER, if_vmx, "pci");
3675 
3676 #ifdef _MODULE
3677 #include "ioconf.c"
3678 #endif
3679 
3680 static int
3681 if_vmx_modcmd(modcmd_t cmd, void *opaque)
3682 {
3683 	int error = 0;
3684 
3685 	switch (cmd) {
3686 	case MODULE_CMD_INIT:
3687 #ifdef _MODULE
3688 		error = config_init_component(cfdriver_ioconf_if_vmx,
3689 		    cfattach_ioconf_if_vmx, cfdata_ioconf_if_vmx);
3690 #endif
3691 		return error;
3692 	case MODULE_CMD_FINI:
3693 #ifdef _MODULE
3694 		error = config_fini_component(cfdriver_ioconf_if_vmx,
3695 		    cfattach_ioconf_if_vmx, cfdata_ioconf_if_vmx);
3696 #endif
3697 		return error;
3698 	default:
3699 		return ENOTTY;
3700 	}
3701 }
3702 
3703