xref: /netbsd-src/sys/dev/pci/if_vioif.c (revision 2718af68c3efc72c9769069b5c7f9ed36f6b9def)
1 /*	$NetBSD: if_vioif.c,v 1.80 2022/04/16 18:15:22 andvar Exp $	*/
2 
3 /*
4  * Copyright (c) 2020 The NetBSD Foundation, Inc.
5  * Copyright (c) 2010 Minoura Makoto.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __KERNEL_RCSID(0, "$NetBSD: if_vioif.c,v 1.80 2022/04/16 18:15:22 andvar Exp $");
31 
32 #ifdef _KERNEL_OPT
33 #include "opt_net_mpsafe.h"
34 #endif
35 
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/kernel.h>
39 #include <sys/atomic.h>
40 #include <sys/bus.h>
41 #include <sys/condvar.h>
42 #include <sys/device.h>
43 #include <sys/evcnt.h>
44 #include <sys/intr.h>
45 #include <sys/kmem.h>
46 #include <sys/mbuf.h>
47 #include <sys/mutex.h>
48 #include <sys/sockio.h>
49 #include <sys/syslog.h>
50 #include <sys/cpu.h>
51 #include <sys/module.h>
52 #include <sys/pcq.h>
53 #include <sys/workqueue.h>
54 
55 #include <dev/pci/virtioreg.h>
56 #include <dev/pci/virtiovar.h>
57 
58 #include <net/if.h>
59 #include <net/if_dl.h>
60 #include <net/if_media.h>
61 #include <net/if_ether.h>
62 
63 #include <net/bpf.h>
64 
65 #include "ioconf.h"
66 
67 #ifdef NET_MPSAFE
68 #define VIOIF_MPSAFE	1
69 #define VIOIF_MULTIQ	1
70 #endif
71 
72 /*
73  * if_vioifreg.h:
74  */
75 /* Configuration registers */
76 #define VIRTIO_NET_CONFIG_MAC		 0 /* 8bit x 6byte */
77 #define VIRTIO_NET_CONFIG_STATUS	 6 /* 16bit */
78 #define VIRTIO_NET_CONFIG_MAX_VQ_PAIRS	 8 /* 16bit */
79 #define VIRTIO_NET_CONFIG_MTU		10 /* 16bit */
80 
81 /* Feature bits */
82 #define VIRTIO_NET_F_CSUM		__BIT(0)
83 #define VIRTIO_NET_F_GUEST_CSUM		__BIT(1)
84 #define VIRTIO_NET_F_MAC		__BIT(5)
85 #define VIRTIO_NET_F_GSO		__BIT(6)
86 #define VIRTIO_NET_F_GUEST_TSO4		__BIT(7)
87 #define VIRTIO_NET_F_GUEST_TSO6		__BIT(8)
88 #define VIRTIO_NET_F_GUEST_ECN		__BIT(9)
89 #define VIRTIO_NET_F_GUEST_UFO		__BIT(10)
90 #define VIRTIO_NET_F_HOST_TSO4		__BIT(11)
91 #define VIRTIO_NET_F_HOST_TSO6		__BIT(12)
92 #define VIRTIO_NET_F_HOST_ECN		__BIT(13)
93 #define VIRTIO_NET_F_HOST_UFO		__BIT(14)
94 #define VIRTIO_NET_F_MRG_RXBUF		__BIT(15)
95 #define VIRTIO_NET_F_STATUS		__BIT(16)
96 #define VIRTIO_NET_F_CTRL_VQ		__BIT(17)
97 #define VIRTIO_NET_F_CTRL_RX		__BIT(18)
98 #define VIRTIO_NET_F_CTRL_VLAN		__BIT(19)
99 #define VIRTIO_NET_F_CTRL_RX_EXTRA	__BIT(20)
100 #define VIRTIO_NET_F_GUEST_ANNOUNCE	__BIT(21)
101 #define VIRTIO_NET_F_MQ			__BIT(22)
102 #define VIRTIO_NET_F_CTRL_MAC_ADDR 	__BIT(23)
103 
104 #define VIRTIO_NET_FLAG_BITS			\
105 	VIRTIO_COMMON_FLAG_BITS			\
106 	"b\x17" "CTRL_MAC\0"			\
107 	"b\x16" "MQ\0"				\
108 	"b\x15" "GUEST_ANNOUNCE\0"		\
109 	"b\x14" "CTRL_RX_EXTRA\0"		\
110 	"b\x13" "CTRL_VLAN\0"			\
111 	"b\x12" "CTRL_RX\0"			\
112 	"b\x11" "CTRL_VQ\0"			\
113 	"b\x10" "STATUS\0"			\
114 	"b\x0f" "MRG_RXBUF\0"			\
115 	"b\x0e" "HOST_UFO\0"			\
116 	"b\x0d" "HOST_ECN\0"			\
117 	"b\x0c" "HOST_TSO6\0"			\
118 	"b\x0b" "HOST_TSO4\0"			\
119 	"b\x0a" "GUEST_UFO\0"			\
120 	"b\x09" "GUEST_ECN\0"			\
121 	"b\x08" "GUEST_TSO6\0"			\
122 	"b\x07" "GUEST_TSO4\0"			\
123 	"b\x06" "GSO\0"				\
124 	"b\x05" "MAC\0"				\
125 	"b\x01" "GUEST_CSUM\0"			\
126 	"b\x00" "CSUM\0"
127 
128 /* Status */
129 #define VIRTIO_NET_S_LINK_UP	1
130 
131 /* Packet header structure */
132 struct virtio_net_hdr {
133 	uint8_t		flags;
134 	uint8_t		gso_type;
135 	uint16_t	hdr_len;
136 	uint16_t	gso_size;
137 	uint16_t	csum_start;
138 	uint16_t	csum_offset;
139 
140 	uint16_t	num_buffers; /* VIRTIO_NET_F_MRG_RXBUF enabled or v1 */
141 } __packed;
142 
143 #define VIRTIO_NET_HDR_F_NEEDS_CSUM	1 /* flags */
144 #define VIRTIO_NET_HDR_GSO_NONE		0 /* gso_type */
145 #define VIRTIO_NET_HDR_GSO_TCPV4	1 /* gso_type */
146 #define VIRTIO_NET_HDR_GSO_UDP		3 /* gso_type */
147 #define VIRTIO_NET_HDR_GSO_TCPV6	4 /* gso_type */
148 #define VIRTIO_NET_HDR_GSO_ECN		0x80 /* gso_type, |'ed */
149 
150 #define VIRTIO_NET_MAX_GSO_LEN		(65536+ETHER_HDR_LEN)
151 
152 /* Control virtqueue */
153 struct virtio_net_ctrl_cmd {
154 	uint8_t	class;
155 	uint8_t	command;
156 } __packed;
157 #define VIRTIO_NET_CTRL_RX		0
158 # define VIRTIO_NET_CTRL_RX_PROMISC	0
159 # define VIRTIO_NET_CTRL_RX_ALLMULTI	1
160 
161 #define VIRTIO_NET_CTRL_MAC		1
162 # define VIRTIO_NET_CTRL_MAC_TABLE_SET	0
163 # define  VIRTIO_NET_CTRL_MAC_ADDR_SET	1
164 
165 #define VIRTIO_NET_CTRL_VLAN		2
166 # define VIRTIO_NET_CTRL_VLAN_ADD	0
167 # define VIRTIO_NET_CTRL_VLAN_DEL	1
168 
169 #define VIRTIO_NET_CTRL_MQ			4
170 # define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET	0
171 # define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN	1
172 # define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX	0x8000
173 
174 struct virtio_net_ctrl_status {
175 	uint8_t	ack;
176 } __packed;
177 #define VIRTIO_NET_OK			0
178 #define VIRTIO_NET_ERR			1
179 
180 struct virtio_net_ctrl_rx {
181 	uint8_t	onoff;
182 } __packed;
183 
184 struct virtio_net_ctrl_mac_tbl {
185 	uint32_t nentries;
186 	uint8_t macs[][ETHER_ADDR_LEN];
187 } __packed;
188 
189 struct virtio_net_ctrl_mac_addr {
190 	uint8_t mac[ETHER_ADDR_LEN];
191 } __packed;
192 
193 struct virtio_net_ctrl_vlan {
194 	uint16_t id;
195 } __packed;
196 
197 struct virtio_net_ctrl_mq {
198 	uint16_t virtqueue_pairs;
199 } __packed;
200 
201 /*
202  * if_vioifvar.h:
203  */
204 
205 /*
206  * Locking notes:
207  * + a field in vioif_txqueue is protected by txq_lock (a spin mutex), and
208  *   a field in vioif_rxqueue is protected by rxq_lock (a spin mutex).
209  *      - more than one lock cannot be held at onece
210  * + ctrlq_inuse is protected by ctrlq_wait_lock.
211  *      - other fields in vioif_ctrlqueue are protected by ctrlq_inuse
212  *      - txq_lock or rxq_lock cannot be held along with ctrlq_wait_lock
213  * + fields in vioif_softc except queues are protected by
214  *   sc->sc_lock(an adaptive mutex)
215  *      - the lock is held before acquisition of other locks
216  */
217 
218 struct vioif_ctrl_cmdspec {
219 	bus_dmamap_t	dmamap;
220 	void		*buf;
221 	bus_size_t	bufsize;
222 };
223 
224 struct vioif_work {
225 	struct work	 cookie;
226 	void		(*func)(void *);
227 	void		*arg;
228 	unsigned int	 added;
229 };
230 
231 struct vioif_txqueue {
232 	kmutex_t		*txq_lock;	/* lock for tx operations */
233 
234 	struct virtqueue	*txq_vq;
235 	bool			txq_stopping;
236 	bool			txq_link_active;
237 	pcq_t			*txq_intrq;
238 
239 	struct virtio_net_hdr	*txq_hdrs;
240 	bus_dmamap_t		*txq_hdr_dmamaps;
241 
242 	struct mbuf		**txq_mbufs;
243 	bus_dmamap_t		*txq_dmamaps;
244 
245 	void			*txq_deferred_transmit;
246 	void			*txq_handle_si;
247 	struct vioif_work	 txq_work;
248 	bool			 txq_workqueue;
249 	bool			 txq_active;
250 
251 	char			 txq_evgroup[16];
252 	struct evcnt		 txq_defrag_failed;
253 	struct evcnt		 txq_mbuf_load_failed;
254 	struct evcnt		 txq_enqueue_reserve_failed;
255 };
256 
257 struct vioif_rxqueue {
258 	kmutex_t		*rxq_lock;	/* lock for rx operations */
259 
260 	struct virtqueue	*rxq_vq;
261 	bool			rxq_stopping;
262 
263 	struct virtio_net_hdr	*rxq_hdrs;
264 	bus_dmamap_t		*rxq_hdr_dmamaps;
265 
266 	struct mbuf		**rxq_mbufs;
267 	bus_dmamap_t		*rxq_dmamaps;
268 
269 	void			*rxq_handle_si;
270 	struct vioif_work	 rxq_work;
271 	bool			 rxq_workqueue;
272 	bool			 rxq_active;
273 
274 	char			 rxq_evgroup[16];
275 	struct evcnt		 rxq_mbuf_add_failed;
276 };
277 
278 struct vioif_ctrlqueue {
279 	struct virtqueue		*ctrlq_vq;
280 	enum {
281 		FREE, INUSE, DONE
282 	}				ctrlq_inuse;
283 	kcondvar_t			ctrlq_wait;
284 	kmutex_t			ctrlq_wait_lock;
285 	struct lwp			*ctrlq_owner;
286 
287 	struct virtio_net_ctrl_cmd	*ctrlq_cmd;
288 	struct virtio_net_ctrl_status	*ctrlq_status;
289 	struct virtio_net_ctrl_rx	*ctrlq_rx;
290 	struct virtio_net_ctrl_mac_tbl	*ctrlq_mac_tbl_uc;
291 	struct virtio_net_ctrl_mac_tbl	*ctrlq_mac_tbl_mc;
292 	struct virtio_net_ctrl_mac_addr	*ctrlq_mac_addr;
293 	struct virtio_net_ctrl_mq	*ctrlq_mq;
294 
295 	bus_dmamap_t			ctrlq_cmd_dmamap;
296 	bus_dmamap_t			ctrlq_status_dmamap;
297 	bus_dmamap_t			ctrlq_rx_dmamap;
298 	bus_dmamap_t			ctrlq_tbl_uc_dmamap;
299 	bus_dmamap_t			ctrlq_tbl_mc_dmamap;
300 	bus_dmamap_t			ctrlq_mac_addr_dmamap;
301 	bus_dmamap_t			ctrlq_mq_dmamap;
302 
303 	struct evcnt			ctrlq_cmd_load_failed;
304 	struct evcnt			ctrlq_cmd_failed;
305 };
306 
307 struct vioif_softc {
308 	device_t		sc_dev;
309 	kmutex_t		sc_lock;
310 	struct sysctllog	*sc_sysctllog;
311 
312 	struct virtio_softc	*sc_virtio;
313 	struct virtqueue	*sc_vqs;
314 	u_int			 sc_hdr_size;
315 
316 	int			sc_max_nvq_pairs;
317 	int			sc_req_nvq_pairs;
318 	int			sc_act_nvq_pairs;
319 
320 	uint8_t			sc_mac[ETHER_ADDR_LEN];
321 	struct ethercom		sc_ethercom;
322 	bool			sc_link_active;
323 
324 	struct vioif_txqueue	*sc_txq;
325 	struct vioif_rxqueue	*sc_rxq;
326 
327 	bool			sc_has_ctrl;
328 	struct vioif_ctrlqueue	sc_ctrlq;
329 
330 	bus_dma_segment_t	sc_hdr_segs[1];
331 	void			*sc_dmamem;
332 	void			*sc_kmem;
333 
334 	void			*sc_ctl_softint;
335 
336 	struct workqueue	*sc_txrx_workqueue;
337 	bool			 sc_txrx_workqueue_sysctl;
338 	u_int			 sc_tx_intr_process_limit;
339 	u_int			 sc_tx_process_limit;
340 	u_int			 sc_rx_intr_process_limit;
341 	u_int			 sc_rx_process_limit;
342 };
343 #define VIRTIO_NET_TX_MAXNSEGS		(16) /* XXX */
344 #define VIRTIO_NET_CTRL_MAC_MAXENTRIES	(64) /* XXX */
345 
346 #define VIOIF_TX_INTR_PROCESS_LIMIT	256
347 #define VIOIF_TX_PROCESS_LIMIT		256
348 #define VIOIF_RX_INTR_PROCESS_LIMIT	0U
349 #define VIOIF_RX_PROCESS_LIMIT		256
350 
351 #define VIOIF_WORKQUEUE_PRI		PRI_SOFTNET
352 
353 /* cfattach interface functions */
354 static int	vioif_match(device_t, cfdata_t, void *);
355 static void	vioif_attach(device_t, device_t, void *);
356 static int	vioif_finalize_teardown(device_t);
357 
358 /* ifnet interface functions */
359 static int	vioif_init(struct ifnet *);
360 static void	vioif_stop(struct ifnet *, int);
361 static void	vioif_start(struct ifnet *);
362 static void	vioif_start_locked(struct ifnet *, struct vioif_txqueue *);
363 static int	vioif_transmit(struct ifnet *, struct mbuf *);
364 static void	vioif_transmit_locked(struct ifnet *, struct vioif_txqueue *);
365 static int	vioif_ioctl(struct ifnet *, u_long, void *);
366 static void	vioif_watchdog(struct ifnet *);
367 static int	vioif_ifflags_cb(struct ethercom *);
368 
369 /* rx */
370 static int	vioif_add_rx_mbuf(struct vioif_rxqueue *, int);
371 static void	vioif_free_rx_mbuf(struct vioif_rxqueue *, int);
372 static void	vioif_populate_rx_mbufs_locked(struct vioif_softc *,
373 		    struct vioif_rxqueue *);
374 static void	vioif_rx_queue_clear(struct vioif_rxqueue *);
375 static bool	vioif_rx_deq_locked(struct vioif_softc *, struct virtio_softc *,
376 		    struct vioif_rxqueue *, u_int);
377 static int	vioif_rx_intr(void *);
378 static void	vioif_rx_handle(void *);
379 static void	vioif_rx_sched_handle(struct vioif_softc *,
380 		    struct vioif_rxqueue *);
381 static void	vioif_rx_drain(struct vioif_rxqueue *);
382 
383 /* tx */
384 static int	vioif_tx_intr(void *);
385 static void	vioif_tx_handle(void *);
386 static void	vioif_tx_sched_handle(struct vioif_softc *,
387 		    struct vioif_txqueue *);
388 static void	vioif_tx_queue_clear(struct vioif_txqueue *);
389 static bool	vioif_tx_deq_locked(struct vioif_softc *, struct virtio_softc *,
390 		    struct vioif_txqueue *, u_int);
391 static void	vioif_tx_drain(struct vioif_txqueue *);
392 static void	vioif_deferred_transmit(void *);
393 
394 /* workqueue */
395 static struct workqueue*
396 		vioif_workq_create(const char *, pri_t, int, int);
397 static void	vioif_workq_destroy(struct workqueue *);
398 static void	vioif_workq_work(struct work *, void *);
399 static void	vioif_work_set(struct vioif_work *, void(*)(void *), void *);
400 static void	vioif_work_add(struct workqueue *, struct vioif_work *);
401 static void	vioif_work_wait(struct workqueue *, struct vioif_work *);
402 
403 /* other control */
404 static bool	vioif_is_link_up(struct vioif_softc *);
405 static void	vioif_update_link_status(struct vioif_softc *);
406 static int	vioif_ctrl_rx(struct vioif_softc *, int, bool);
407 static int	vioif_set_promisc(struct vioif_softc *, bool);
408 static int	vioif_set_allmulti(struct vioif_softc *, bool);
409 static int	vioif_set_rx_filter(struct vioif_softc *);
410 static int	vioif_rx_filter(struct vioif_softc *);
411 static int	vioif_set_mac_addr(struct vioif_softc *);
412 static int	vioif_ctrl_intr(void *);
413 static int	vioif_config_change(struct virtio_softc *);
414 static void	vioif_ctl_softint(void *);
415 static int	vioif_ctrl_mq_vq_pairs_set(struct vioif_softc *, int);
416 static void	vioif_enable_interrupt_vqpairs(struct vioif_softc *);
417 static void	vioif_disable_interrupt_vqpairs(struct vioif_softc *);
418 static int	vioif_setup_sysctl(struct vioif_softc *);
419 static void	vioif_setup_stats(struct vioif_softc *);
420 static int	vioif_ifflags(struct vioif_softc *);
421 
422 CFATTACH_DECL_NEW(vioif, sizeof(struct vioif_softc),
423 		  vioif_match, vioif_attach, NULL, NULL);
424 
425 static int
426 vioif_match(device_t parent, cfdata_t match, void *aux)
427 {
428 	struct virtio_attach_args *va = aux;
429 
430 	if (va->sc_childdevid == VIRTIO_DEVICE_ID_NETWORK)
431 		return 1;
432 
433 	return 0;
434 }
435 
436 static int
437 vioif_dmamap_create(struct vioif_softc *sc, bus_dmamap_t *map,
438     bus_size_t size, int nsegs, const char *usage)
439 {
440 	int r;
441 
442 	r = bus_dmamap_create(virtio_dmat(sc->sc_virtio), size,
443 	    nsegs, size, 0, BUS_DMA_NOWAIT | BUS_DMA_ALLOCNOW, map);
444 
445 	if (r != 0) {
446 		aprint_error_dev(sc->sc_dev, "%s dmamap creation failed, "
447 		    "error code %d\n", usage, r);
448 	}
449 
450 	return r;
451 }
452 
453 static void
454 vioif_dmamap_destroy(struct vioif_softc *sc, bus_dmamap_t *map)
455 {
456 
457 	if (*map) {
458 		bus_dmamap_destroy(virtio_dmat(sc->sc_virtio), *map);
459 		*map = NULL;
460 	}
461 }
462 
463 static int
464 vioif_dmamap_create_load(struct vioif_softc *sc, bus_dmamap_t *map,
465     void *buf, bus_size_t size, int nsegs, int rw, const char *usage)
466 {
467 	int r;
468 
469 	r = vioif_dmamap_create(sc, map, size, nsegs, usage);
470 	if (r != 0)
471 		return 1;
472 
473 	r = bus_dmamap_load(virtio_dmat(sc->sc_virtio), *map, buf,
474 	    size, NULL, rw | BUS_DMA_NOWAIT);
475 	if (r != 0) {
476 		vioif_dmamap_destroy(sc, map);
477 		aprint_error_dev(sc->sc_dev, "%s dmamap load failed. "
478 		    "error code %d\n", usage, r);
479 	}
480 
481 	return r;
482 }
483 
484 static void *
485 vioif_assign_mem(intptr_t *p, size_t size)
486 {
487 	intptr_t rv;
488 
489 	rv = *p;
490 	*p += size;
491 
492 	return (void *)rv;
493 }
494 
495 static void
496 vioif_alloc_queues(struct vioif_softc *sc)
497 {
498 	int nvq_pairs = sc->sc_max_nvq_pairs;
499 	int nvqs = nvq_pairs * 2;
500 	int i;
501 
502 	KASSERT(nvq_pairs <= VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX);
503 
504 	sc->sc_rxq = kmem_zalloc(sizeof(sc->sc_rxq[0]) * nvq_pairs,
505 	    KM_SLEEP);
506 	sc->sc_txq = kmem_zalloc(sizeof(sc->sc_txq[0]) * nvq_pairs,
507 	    KM_SLEEP);
508 
509 	if (sc->sc_has_ctrl)
510 		nvqs++;
511 
512 	sc->sc_vqs = kmem_zalloc(sizeof(sc->sc_vqs[0]) * nvqs, KM_SLEEP);
513 	nvqs = 0;
514 	for (i = 0; i < nvq_pairs; i++) {
515 		sc->sc_rxq[i].rxq_vq = &sc->sc_vqs[nvqs++];
516 		sc->sc_txq[i].txq_vq = &sc->sc_vqs[nvqs++];
517 	}
518 
519 	if (sc->sc_has_ctrl)
520 		sc->sc_ctrlq.ctrlq_vq = &sc->sc_vqs[nvqs++];
521 }
522 
523 static void
524 vioif_free_queues(struct vioif_softc *sc)
525 {
526 	int nvq_pairs = sc->sc_max_nvq_pairs;
527 	int nvqs = nvq_pairs * 2;
528 
529 	if (sc->sc_ctrlq.ctrlq_vq)
530 		nvqs++;
531 
532 	if (sc->sc_txq) {
533 		kmem_free(sc->sc_txq, sizeof(sc->sc_txq[0]) * nvq_pairs);
534 		sc->sc_txq = NULL;
535 	}
536 
537 	if (sc->sc_rxq) {
538 		kmem_free(sc->sc_rxq, sizeof(sc->sc_rxq[0]) * nvq_pairs);
539 		sc->sc_rxq = NULL;
540 	}
541 
542 	if (sc->sc_vqs) {
543 		kmem_free(sc->sc_vqs, sizeof(sc->sc_vqs[0]) * nvqs);
544 		sc->sc_vqs = NULL;
545 	}
546 }
547 
548 /* allocate memory */
549 /*
550  * dma memory is used for:
551  *   rxq_hdrs[slot]:	 metadata array for received frames (READ)
552  *   txq_hdrs[slot]:	 metadata array for frames to be sent (WRITE)
553  *   ctrlq_cmd:		 command to be sent via ctrl vq (WRITE)
554  *   ctrlq_status:	 return value for a command via ctrl vq (READ)
555  *   ctrlq_rx:		 parameter for a VIRTIO_NET_CTRL_RX class command
556  *			 (WRITE)
557  *   ctrlq_mac_tbl_uc:	 unicast MAC address filter for a VIRTIO_NET_CTRL_MAC
558  *			 class command (WRITE)
559  *   ctrlq_mac_tbl_mc:	 multicast MAC address filter for a VIRTIO_NET_CTRL_MAC
560  *			 class command (WRITE)
561  * ctrlq_* structures are allocated only one each; they are protected by
562  * ctrlq_inuse variable and ctrlq_wait condvar.
563  */
564 /*
565  * dynamically allocated memory is used for:
566  *   rxq_hdr_dmamaps[slot]:	bus_dmamap_t array for sc_rx_hdrs[slot]
567  *   txq_hdr_dmamaps[slot]:	bus_dmamap_t array for sc_tx_hdrs[slot]
568  *   rxq_dmamaps[slot]:		bus_dmamap_t array for received payload
569  *   txq_dmamaps[slot]:		bus_dmamap_t array for sent payload
570  *   rxq_mbufs[slot]:		mbuf pointer array for received frames
571  *   txq_mbufs[slot]:		mbuf pointer array for sent frames
572  */
573 static int
574 vioif_alloc_mems(struct vioif_softc *sc)
575 {
576 	struct virtio_softc *vsc = sc->sc_virtio;
577 	struct vioif_txqueue *txq;
578 	struct vioif_rxqueue *rxq;
579 	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
580 	int allocsize, allocsize2, r, rsegs, i, qid;
581 	void *vaddr;
582 	intptr_t p;
583 
584 	allocsize = 0;
585 	for (qid = 0; qid < sc->sc_max_nvq_pairs; qid++) {
586 		rxq = &sc->sc_rxq[qid];
587 		txq = &sc->sc_txq[qid];
588 
589 		allocsize += sizeof(struct virtio_net_hdr) *
590 			(rxq->rxq_vq->vq_num + txq->txq_vq->vq_num);
591 	}
592 	if (sc->sc_has_ctrl) {
593 		allocsize += sizeof(struct virtio_net_ctrl_cmd);
594 		allocsize += sizeof(struct virtio_net_ctrl_status);
595 		allocsize += sizeof(struct virtio_net_ctrl_rx);
596 		allocsize += sizeof(struct virtio_net_ctrl_mac_tbl)
597 		    + ETHER_ADDR_LEN;
598 		allocsize += sizeof(struct virtio_net_ctrl_mac_tbl)
599 		    + ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_MAXENTRIES;
600 		allocsize += sizeof(struct virtio_net_ctrl_mac_addr);
601 		allocsize += sizeof(struct virtio_net_ctrl_mq);
602 	}
603 	r = bus_dmamem_alloc(virtio_dmat(vsc), allocsize, 0, 0,
604 	    &sc->sc_hdr_segs[0], 1, &rsegs, BUS_DMA_NOWAIT);
605 	if (r != 0) {
606 		aprint_error_dev(sc->sc_dev,
607 		    "DMA memory allocation failed, size %d, "
608 		    "error code %d\n", allocsize, r);
609 		goto err_none;
610 	}
611 	r = bus_dmamem_map(virtio_dmat(vsc),
612 	    &sc->sc_hdr_segs[0], 1, allocsize, &vaddr, BUS_DMA_NOWAIT);
613 	if (r != 0) {
614 		aprint_error_dev(sc->sc_dev,
615 		    "DMA memory map failed, error code %d\n", r);
616 		goto err_dmamem_alloc;
617 	}
618 
619 	memset(vaddr, 0, allocsize);
620 	sc->sc_dmamem = vaddr;
621 	p = (intptr_t) vaddr;
622 
623 	for (qid = 0; qid < sc->sc_max_nvq_pairs; qid++) {
624 		rxq = &sc->sc_rxq[qid];
625 		txq = &sc->sc_txq[qid];
626 
627 		rxq->rxq_hdrs = vioif_assign_mem(&p,
628 		    sizeof(struct virtio_net_hdr) * rxq->rxq_vq->vq_num);
629 		txq->txq_hdrs = vioif_assign_mem(&p,
630 		    sizeof(struct virtio_net_hdr) * txq->txq_vq->vq_num);
631 	}
632 	if (sc->sc_has_ctrl) {
633 		ctrlq->ctrlq_cmd = vioif_assign_mem(&p,
634 		    sizeof(*ctrlq->ctrlq_cmd));
635 		ctrlq->ctrlq_status = vioif_assign_mem(&p,
636 		    sizeof(*ctrlq->ctrlq_status));
637 		ctrlq->ctrlq_rx = vioif_assign_mem(&p,
638 		    sizeof(*ctrlq->ctrlq_rx));
639 		ctrlq->ctrlq_mac_tbl_uc = vioif_assign_mem(&p,
640 		    sizeof(*ctrlq->ctrlq_mac_tbl_uc)
641 		    + ETHER_ADDR_LEN);
642 		ctrlq->ctrlq_mac_tbl_mc = vioif_assign_mem(&p,
643 		    sizeof(*ctrlq->ctrlq_mac_tbl_mc)
644 		    + ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_MAXENTRIES);
645 		ctrlq->ctrlq_mac_addr = vioif_assign_mem(&p,
646 		    sizeof(*ctrlq->ctrlq_mac_addr));
647 		ctrlq->ctrlq_mq = vioif_assign_mem(&p, sizeof(*ctrlq->ctrlq_mq));
648 	}
649 
650 	allocsize2 = 0;
651 	for (qid = 0; qid < sc->sc_max_nvq_pairs; qid++) {
652 		int rxqsize, txqsize;
653 
654 		rxq = &sc->sc_rxq[qid];
655 		txq = &sc->sc_txq[qid];
656 		rxqsize = rxq->rxq_vq->vq_num;
657 		txqsize = txq->txq_vq->vq_num;
658 
659 		allocsize2 += sizeof(rxq->rxq_dmamaps[0]) * rxqsize;
660 		allocsize2 += sizeof(rxq->rxq_hdr_dmamaps[0]) * rxqsize;
661 		allocsize2 += sizeof(rxq->rxq_mbufs[0]) * rxqsize;
662 
663 		allocsize2 += sizeof(txq->txq_dmamaps[0]) * txqsize;
664 		allocsize2 += sizeof(txq->txq_hdr_dmamaps[0]) * txqsize;
665 		allocsize2 += sizeof(txq->txq_mbufs[0]) * txqsize;
666 	}
667 	vaddr = kmem_zalloc(allocsize2, KM_SLEEP);
668 	sc->sc_kmem = vaddr;
669 	p = (intptr_t) vaddr;
670 
671 	for (qid = 0; qid < sc->sc_max_nvq_pairs; qid++) {
672 		int rxqsize, txqsize;
673 		rxq = &sc->sc_rxq[qid];
674 		txq = &sc->sc_txq[qid];
675 		rxqsize = rxq->rxq_vq->vq_num;
676 		txqsize = txq->txq_vq->vq_num;
677 
678 		rxq->rxq_hdr_dmamaps = vioif_assign_mem(&p,
679 		    sizeof(rxq->rxq_hdr_dmamaps[0]) * rxqsize);
680 		txq->txq_hdr_dmamaps = vioif_assign_mem(&p,
681 		    sizeof(txq->txq_hdr_dmamaps[0]) * txqsize);
682 		rxq->rxq_dmamaps = vioif_assign_mem(&p,
683 		    sizeof(rxq->rxq_dmamaps[0]) * rxqsize);
684 		txq->txq_dmamaps = vioif_assign_mem(&p,
685 		    sizeof(txq->txq_dmamaps[0]) * txqsize);
686 		rxq->rxq_mbufs = vioif_assign_mem(&p,
687 		    sizeof(rxq->rxq_mbufs[0]) * rxqsize);
688 		txq->txq_mbufs = vioif_assign_mem(&p,
689 		    sizeof(txq->txq_mbufs[0]) * txqsize);
690 	}
691 
692 	for (qid = 0; qid < sc->sc_max_nvq_pairs; qid++) {
693 		rxq = &sc->sc_rxq[qid];
694 		txq = &sc->sc_txq[qid];
695 
696 		for (i = 0; i < rxq->rxq_vq->vq_num; i++) {
697 			r = vioif_dmamap_create_load(sc, &rxq->rxq_hdr_dmamaps[i],
698 			    &rxq->rxq_hdrs[i], sc->sc_hdr_size, 1,
699 			    BUS_DMA_READ, "rx header");
700 			if (r != 0)
701 				goto err_reqs;
702 
703 			r = vioif_dmamap_create(sc, &rxq->rxq_dmamaps[i],
704 			    MCLBYTES, 1, "rx payload");
705 			if (r != 0)
706 				goto err_reqs;
707 		}
708 
709 		for (i = 0; i < txq->txq_vq->vq_num; i++) {
710 			r = vioif_dmamap_create_load(sc, &txq->txq_hdr_dmamaps[i],
711 			    &txq->txq_hdrs[i], sc->sc_hdr_size, 1,
712 			    BUS_DMA_READ, "tx header");
713 			if (r != 0)
714 				goto err_reqs;
715 
716 			r = vioif_dmamap_create(sc, &txq->txq_dmamaps[i], ETHER_MAX_LEN,
717 			    VIRTIO_NET_TX_MAXNSEGS, "tx payload");
718 			if (r != 0)
719 				goto err_reqs;
720 		}
721 	}
722 
723 	if (sc->sc_has_ctrl) {
724 		/* control vq class & command */
725 		r = vioif_dmamap_create_load(sc, &ctrlq->ctrlq_cmd_dmamap,
726 		    ctrlq->ctrlq_cmd, sizeof(*ctrlq->ctrlq_cmd), 1,
727 		    BUS_DMA_WRITE, "control command");
728 		if (r != 0)
729 			goto err_reqs;
730 
731 		r = vioif_dmamap_create_load(sc, &ctrlq->ctrlq_status_dmamap,
732 		    ctrlq->ctrlq_status, sizeof(*ctrlq->ctrlq_status), 1,
733 		    BUS_DMA_READ, "control status");
734 		if (r != 0)
735 			goto err_reqs;
736 
737 		/* control vq rx mode command parameter */
738 		r = vioif_dmamap_create_load(sc, &ctrlq->ctrlq_rx_dmamap,
739 		    ctrlq->ctrlq_rx, sizeof(*ctrlq->ctrlq_rx), 1,
740 		    BUS_DMA_WRITE, "rx mode control command");
741 		if (r != 0)
742 			goto err_reqs;
743 
744 		/* multiqueue set command */
745 		r = vioif_dmamap_create_load(sc, &ctrlq->ctrlq_mq_dmamap,
746 		    ctrlq->ctrlq_mq, sizeof(*ctrlq->ctrlq_mq), 1,
747 		    BUS_DMA_WRITE, "multiqueue set command");
748 		if (r != 0)
749 			goto err_reqs;
750 
751 		/* control vq MAC filter table for unicast */
752 		/* do not load now since its length is variable */
753 		r = vioif_dmamap_create(sc, &ctrlq->ctrlq_tbl_uc_dmamap,
754 		    sizeof(*ctrlq->ctrlq_mac_tbl_uc)
755 		    + ETHER_ADDR_LEN, 1,
756 		    "unicast MAC address filter command");
757 		if (r != 0)
758 			goto err_reqs;
759 
760 		/* control vq MAC filter table for multicast */
761 		r = vioif_dmamap_create(sc, &ctrlq->ctrlq_tbl_mc_dmamap,
762 		    sizeof(*ctrlq->ctrlq_mac_tbl_mc)
763 		    + ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_MAXENTRIES, 1,
764 		    "multicast MAC address filter command");
765 		if (r != 0)
766 			goto err_reqs;
767 
768 		/* control vq MAC address set command */
769 		r = vioif_dmamap_create_load(sc,
770 		    &ctrlq->ctrlq_mac_addr_dmamap,
771 		    ctrlq->ctrlq_mac_addr,
772 		    sizeof(*ctrlq->ctrlq_mac_addr), 1,
773 		    BUS_DMA_WRITE, "mac addr set command");
774 		if (r != 0)
775 			goto err_reqs;
776 	}
777 
778 	return 0;
779 
780 err_reqs:
781 	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_tbl_mc_dmamap);
782 	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_tbl_uc_dmamap);
783 	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_rx_dmamap);
784 	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_status_dmamap);
785 	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_cmd_dmamap);
786 	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_mac_addr_dmamap);
787 	for (qid = 0; qid < sc->sc_max_nvq_pairs; qid++) {
788 		rxq = &sc->sc_rxq[qid];
789 		txq = &sc->sc_txq[qid];
790 
791 		for (i = 0; i < txq->txq_vq->vq_num; i++) {
792 			vioif_dmamap_destroy(sc, &txq->txq_dmamaps[i]);
793 			vioif_dmamap_destroy(sc, &txq->txq_hdr_dmamaps[i]);
794 		}
795 		for (i = 0; i < rxq->rxq_vq->vq_num; i++) {
796 			vioif_dmamap_destroy(sc, &rxq->rxq_dmamaps[i]);
797 			vioif_dmamap_destroy(sc, &rxq->rxq_hdr_dmamaps[i]);
798 		}
799 	}
800 	if (sc->sc_kmem) {
801 		kmem_free(sc->sc_kmem, allocsize2);
802 		sc->sc_kmem = NULL;
803 	}
804 	bus_dmamem_unmap(virtio_dmat(vsc), sc->sc_dmamem, allocsize);
805 err_dmamem_alloc:
806 	bus_dmamem_free(virtio_dmat(vsc), &sc->sc_hdr_segs[0], 1);
807 err_none:
808 	return -1;
809 }
810 
811 static void
812 vioif_attach(device_t parent, device_t self, void *aux)
813 {
814 	struct vioif_softc *sc = device_private(self);
815 	struct virtio_softc *vsc = device_private(parent);
816 	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
817 	struct vioif_txqueue *txq;
818 	struct vioif_rxqueue *rxq;
819 	uint64_t features, req_features;
820 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
821 	u_int softint_flags;
822 	int r, i, nvqs=0, req_flags;
823 	char xnamebuf[MAXCOMLEN];
824 
825 	if (virtio_child(vsc) != NULL) {
826 		aprint_normal(": child already attached for %s; "
827 		    "something wrong...\n", device_xname(parent));
828 		return;
829 	}
830 
831 	sc->sc_dev = self;
832 	sc->sc_virtio = vsc;
833 	sc->sc_link_active = false;
834 
835 	sc->sc_max_nvq_pairs = 1;
836 	sc->sc_req_nvq_pairs = 1;
837 	sc->sc_act_nvq_pairs = 1;
838 	sc->sc_txrx_workqueue_sysctl = true;
839 	sc->sc_tx_intr_process_limit = VIOIF_TX_INTR_PROCESS_LIMIT;
840 	sc->sc_tx_process_limit = VIOIF_TX_PROCESS_LIMIT;
841 	sc->sc_rx_intr_process_limit = VIOIF_RX_INTR_PROCESS_LIMIT;
842 	sc->sc_rx_process_limit = VIOIF_RX_PROCESS_LIMIT;
843 
844 	mutex_init(&sc->sc_lock, MUTEX_DEFAULT, IPL_NONE);
845 
846 	snprintf(xnamebuf, sizeof(xnamebuf), "%s_txrx", device_xname(self));
847 	sc->sc_txrx_workqueue = vioif_workq_create(xnamebuf, VIOIF_WORKQUEUE_PRI,
848 	    IPL_NET, WQ_PERCPU | WQ_MPSAFE);
849 	if (sc->sc_txrx_workqueue == NULL)
850 		goto err;
851 
852 	req_flags = 0;
853 
854 #ifdef VIOIF_MPSAFE
855 	req_flags |= VIRTIO_F_INTR_MPSAFE;
856 #endif
857 	req_flags |= VIRTIO_F_INTR_MSIX;
858 
859 	req_features =
860 	    VIRTIO_NET_F_MAC | VIRTIO_NET_F_STATUS | VIRTIO_NET_F_CTRL_VQ |
861 	    VIRTIO_NET_F_CTRL_RX | VIRTIO_F_NOTIFY_ON_EMPTY;
862 	req_features |= VIRTIO_F_RING_EVENT_IDX;
863 	req_features |= VIRTIO_NET_F_CTRL_MAC_ADDR;
864 #ifdef VIOIF_MULTIQ
865 	req_features |= VIRTIO_NET_F_MQ;
866 #endif
867 	virtio_child_attach_start(vsc, self, IPL_NET, NULL,
868 	    vioif_config_change, virtio_vq_intrhand, req_flags,
869 	    req_features, VIRTIO_NET_FLAG_BITS);
870 
871 	features = virtio_features(vsc);
872 	if (features == 0)
873 		goto err;
874 
875 	if (features & VIRTIO_NET_F_MAC) {
876 		for (i = 0; i < __arraycount(sc->sc_mac); i++) {
877 			sc->sc_mac[i] = virtio_read_device_config_1(vsc,
878 			    VIRTIO_NET_CONFIG_MAC + i);
879 		}
880 	} else {
881 		/* code stolen from sys/net/if_tap.c */
882 		struct timeval tv;
883 		uint32_t ui;
884 		getmicrouptime(&tv);
885 		ui = (tv.tv_sec ^ tv.tv_usec) & 0xffffff;
886 		memcpy(sc->sc_mac+3, (uint8_t *)&ui, 3);
887 		for (i = 0; i < __arraycount(sc->sc_mac); i++) {
888 			virtio_write_device_config_1(vsc,
889 			    VIRTIO_NET_CONFIG_MAC + i, sc->sc_mac[i]);
890 		}
891 	}
892 
893 	/* 'Ethernet' with capital follows other ethernet driver attachment */
894 	aprint_normal_dev(self, "Ethernet address %s\n",
895 	    ether_sprintf(sc->sc_mac));
896 
897 	if (features & (VIRTIO_NET_F_MRG_RXBUF | VIRTIO_F_VERSION_1)) {
898 		sc->sc_hdr_size = sizeof(struct virtio_net_hdr);
899 	} else {
900 		sc->sc_hdr_size = offsetof(struct virtio_net_hdr, num_buffers);
901 	}
902 
903 	if ((features & VIRTIO_NET_F_CTRL_VQ) &&
904 	    (features & VIRTIO_NET_F_CTRL_RX)) {
905 		sc->sc_has_ctrl = true;
906 
907 		cv_init(&ctrlq->ctrlq_wait, "ctrl_vq");
908 		mutex_init(&ctrlq->ctrlq_wait_lock, MUTEX_DEFAULT, IPL_NET);
909 		ctrlq->ctrlq_inuse = FREE;
910 	} else {
911 		sc->sc_has_ctrl = false;
912 	}
913 
914 	if (sc->sc_has_ctrl && (features & VIRTIO_NET_F_MQ)) {
915 		sc->sc_max_nvq_pairs = virtio_read_device_config_2(vsc,
916 		    VIRTIO_NET_CONFIG_MAX_VQ_PAIRS);
917 
918 		if (sc->sc_max_nvq_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX)
919 			goto err;
920 
921 		/* Limit the number of queue pairs to use */
922 		sc->sc_req_nvq_pairs = MIN(sc->sc_max_nvq_pairs, ncpu);
923 	}
924 
925 	vioif_alloc_queues(sc);
926 	virtio_child_attach_set_vqs(vsc, sc->sc_vqs, sc->sc_req_nvq_pairs);
927 
928 #ifdef VIOIF_MPSAFE
929 	softint_flags = SOFTINT_NET | SOFTINT_MPSAFE;
930 #else
931 	softint_flags = SOFTINT_NET;
932 #endif
933 
934 	/*
935 	 * Allocating virtqueues
936 	 */
937 	for (i = 0; i < sc->sc_max_nvq_pairs; i++) {
938 		rxq = &sc->sc_rxq[i];
939 		txq = &sc->sc_txq[i];
940 		char qname[32];
941 
942 		rxq->rxq_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NET);
943 
944 		rxq->rxq_handle_si = softint_establish(softint_flags,
945 		    vioif_rx_handle, rxq);
946 		if (rxq->rxq_handle_si == NULL) {
947 			aprint_error_dev(self, "cannot establish rx softint\n");
948 			goto err;
949 		}
950 
951 		snprintf(qname, sizeof(qname), "rx%d", i);
952 		r = virtio_alloc_vq(vsc, rxq->rxq_vq, nvqs,
953 		    MCLBYTES + sc->sc_hdr_size, 2, qname);
954 		if (r != 0)
955 			goto err;
956 		nvqs++;
957 		rxq->rxq_vq->vq_intrhand = vioif_rx_intr;
958 		rxq->rxq_vq->vq_intrhand_arg = (void *)rxq;
959 		rxq->rxq_stopping = true;
960 		vioif_work_set(&rxq->rxq_work, vioif_rx_handle, rxq);
961 
962 		txq->txq_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NET);
963 
964 		txq->txq_deferred_transmit = softint_establish(softint_flags,
965 		    vioif_deferred_transmit, txq);
966 		if (txq->txq_deferred_transmit == NULL) {
967 			aprint_error_dev(self, "cannot establish tx softint\n");
968 			goto err;
969 		}
970 		txq->txq_handle_si = softint_establish(softint_flags,
971 		    vioif_tx_handle, txq);
972 		if (txq->txq_handle_si == NULL) {
973 			aprint_error_dev(self, "cannot establish tx softint\n");
974 			goto err;
975 		}
976 
977 		snprintf(qname, sizeof(qname), "tx%d", i);
978 		r = virtio_alloc_vq(vsc, txq->txq_vq, nvqs,
979 		    sc->sc_hdr_size + (ETHER_MAX_LEN - ETHER_HDR_LEN),
980 		    VIRTIO_NET_TX_MAXNSEGS + 1, qname);
981 		if (r != 0)
982 			goto err;
983 		nvqs++;
984 		txq->txq_vq->vq_intrhand = vioif_tx_intr;
985 		txq->txq_vq->vq_intrhand_arg = (void *)txq;
986 		txq->txq_link_active = sc->sc_link_active;
987 		txq->txq_stopping = false;
988 		txq->txq_intrq = pcq_create(txq->txq_vq->vq_num, KM_SLEEP);
989 		vioif_work_set(&txq->txq_work, vioif_tx_handle, txq);
990 	}
991 
992 	if (sc->sc_has_ctrl) {
993 		/*
994 		 * Allocating a virtqueue for control channel
995 		 */
996 		r = virtio_alloc_vq(vsc, ctrlq->ctrlq_vq, nvqs,
997 		    NBPG, 1, "control");
998 		if (r != 0) {
999 			aprint_error_dev(self, "failed to allocate "
1000 			    "a virtqueue for control channel, error code %d\n",
1001 			    r);
1002 
1003 			sc->sc_has_ctrl = false;
1004 			cv_destroy(&ctrlq->ctrlq_wait);
1005 			mutex_destroy(&ctrlq->ctrlq_wait_lock);
1006 		} else {
1007 			nvqs++;
1008 			ctrlq->ctrlq_vq->vq_intrhand = vioif_ctrl_intr;
1009 			ctrlq->ctrlq_vq->vq_intrhand_arg = (void *) ctrlq;
1010 		}
1011 	}
1012 
1013 	sc->sc_ctl_softint = softint_establish(softint_flags,
1014 	    vioif_ctl_softint, sc);
1015 	if (sc->sc_ctl_softint == NULL) {
1016 		aprint_error_dev(self, "cannot establish ctl softint\n");
1017 		goto err;
1018 	}
1019 
1020 	if (vioif_alloc_mems(sc) < 0)
1021 		goto err;
1022 
1023 	if (virtio_child_attach_finish(vsc) != 0)
1024 		goto err;
1025 
1026 	if (vioif_setup_sysctl(sc) != 0) {
1027 		aprint_error_dev(self, "unable to create sysctl node\n");
1028 		/* continue */
1029 	}
1030 
1031 	vioif_setup_stats(sc);
1032 
1033 	strlcpy(ifp->if_xname, device_xname(self), IFNAMSIZ);
1034 	ifp->if_softc = sc;
1035 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1036 #ifdef VIOIF_MPSAFE
1037 	ifp->if_extflags = IFEF_MPSAFE;
1038 #endif
1039 	ifp->if_start = vioif_start;
1040 	if (sc->sc_req_nvq_pairs > 1)
1041 		ifp->if_transmit = vioif_transmit;
1042 	ifp->if_ioctl = vioif_ioctl;
1043 	ifp->if_init = vioif_init;
1044 	ifp->if_stop = vioif_stop;
1045 	ifp->if_capabilities = 0;
1046 	ifp->if_watchdog = vioif_watchdog;
1047 	txq = &sc->sc_txq[0];
1048 	IFQ_SET_MAXLEN(&ifp->if_snd, MAX(txq->txq_vq->vq_num, IFQ_MAXLEN));
1049 	IFQ_SET_READY(&ifp->if_snd);
1050 
1051 	sc->sc_ethercom.ec_capabilities |= ETHERCAP_VLAN_MTU;
1052 
1053 	if_attach(ifp);
1054 	if_deferred_start_init(ifp, NULL);
1055 	ether_ifattach(ifp, sc->sc_mac);
1056 	ether_set_ifflags_cb(&sc->sc_ethercom, vioif_ifflags_cb);
1057 
1058 	return;
1059 
1060 err:
1061 	for (i = 0; i < sc->sc_max_nvq_pairs; i++) {
1062 		rxq = &sc->sc_rxq[i];
1063 		txq = &sc->sc_txq[i];
1064 
1065 		if (rxq->rxq_lock) {
1066 			mutex_obj_free(rxq->rxq_lock);
1067 			rxq->rxq_lock = NULL;
1068 		}
1069 
1070 		if (rxq->rxq_handle_si) {
1071 			softint_disestablish(rxq->rxq_handle_si);
1072 			rxq->rxq_handle_si = NULL;
1073 		}
1074 
1075 		if (txq->txq_lock) {
1076 			mutex_obj_free(txq->txq_lock);
1077 			txq->txq_lock = NULL;
1078 		}
1079 
1080 		if (txq->txq_handle_si) {
1081 			softint_disestablish(txq->txq_handle_si);
1082 			txq->txq_handle_si = NULL;
1083 		}
1084 
1085 		if (txq->txq_deferred_transmit) {
1086 			softint_disestablish(txq->txq_deferred_transmit);
1087 			txq->txq_deferred_transmit = NULL;
1088 		}
1089 
1090 		if (txq->txq_intrq) {
1091 			pcq_destroy(txq->txq_intrq);
1092 			txq->txq_intrq = NULL;
1093 		}
1094 	}
1095 
1096 	if (sc->sc_has_ctrl) {
1097 		cv_destroy(&ctrlq->ctrlq_wait);
1098 		mutex_destroy(&ctrlq->ctrlq_wait_lock);
1099 	}
1100 
1101 	while (nvqs > 0)
1102 		virtio_free_vq(vsc, &sc->sc_vqs[--nvqs]);
1103 
1104 	vioif_free_queues(sc);
1105 	mutex_destroy(&sc->sc_lock);
1106 	virtio_child_attach_failed(vsc);
1107 	config_finalize_register(self, vioif_finalize_teardown);
1108 
1109 	return;
1110 }
1111 
1112 static int
1113 vioif_finalize_teardown(device_t self)
1114 {
1115 	struct vioif_softc *sc = device_private(self);
1116 
1117 	if (sc->sc_txrx_workqueue != NULL) {
1118 		vioif_workq_destroy(sc->sc_txrx_workqueue);
1119 		sc->sc_txrx_workqueue = NULL;
1120 	}
1121 
1122 	return 0;
1123 }
1124 
1125 static void
1126 vioif_enable_interrupt_vqpairs(struct vioif_softc *sc)
1127 {
1128 	struct virtio_softc *vsc = sc->sc_virtio;
1129 	struct vioif_txqueue *txq;
1130 	struct vioif_rxqueue *rxq;
1131 	int i;
1132 
1133 	for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
1134 		txq = &sc->sc_txq[i];
1135 		rxq = &sc->sc_rxq[i];
1136 
1137 		virtio_start_vq_intr(vsc, txq->txq_vq);
1138 		virtio_start_vq_intr(vsc, rxq->rxq_vq);
1139 	}
1140 }
1141 
1142 static void
1143 vioif_disable_interrupt_vqpairs(struct vioif_softc *sc)
1144 {
1145 	struct virtio_softc *vsc = sc->sc_virtio;
1146 	struct vioif_txqueue *txq;
1147 	struct vioif_rxqueue *rxq;
1148 	int i;
1149 
1150 	for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
1151 		rxq = &sc->sc_rxq[i];
1152 		txq = &sc->sc_txq[i];
1153 
1154 		virtio_stop_vq_intr(vsc, rxq->rxq_vq);
1155 		virtio_stop_vq_intr(vsc, txq->txq_vq);
1156 	}
1157 }
1158 
1159 /*
1160  * Interface functions for ifnet
1161  */
1162 static int
1163 vioif_init(struct ifnet *ifp)
1164 {
1165 	struct vioif_softc *sc = ifp->if_softc;
1166 	struct virtio_softc *vsc = sc->sc_virtio;
1167 	struct vioif_rxqueue *rxq;
1168 	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
1169 	int r, i;
1170 
1171 	vioif_stop(ifp, 0);
1172 
1173 	r = virtio_reinit_start(vsc);
1174 	if (r != 0) {
1175 		log(LOG_ERR, "%s: reset failed\n", ifp->if_xname);
1176 		return EIO;
1177 	}
1178 
1179 	virtio_negotiate_features(vsc, virtio_features(vsc));
1180 
1181 	for (i = 0; i < sc->sc_req_nvq_pairs; i++) {
1182 		rxq = &sc->sc_rxq[i];
1183 
1184 		/* Have to set false before vioif_populate_rx_mbufs */
1185 		mutex_enter(rxq->rxq_lock);
1186 		rxq->rxq_stopping = false;
1187 		vioif_populate_rx_mbufs_locked(sc, rxq);
1188 		mutex_exit(rxq->rxq_lock);
1189 
1190 	}
1191 
1192 	virtio_reinit_end(vsc);
1193 
1194 	if (sc->sc_has_ctrl)
1195 		virtio_start_vq_intr(vsc, ctrlq->ctrlq_vq);
1196 
1197 	r = vioif_ctrl_mq_vq_pairs_set(sc, sc->sc_req_nvq_pairs);
1198 	if (r == 0)
1199 		sc->sc_act_nvq_pairs = sc->sc_req_nvq_pairs;
1200 	else
1201 		sc->sc_act_nvq_pairs = 1;
1202 
1203 	for (i = 0; i < sc->sc_act_nvq_pairs; i++)
1204 		sc->sc_txq[i].txq_stopping = false;
1205 
1206 	vioif_enable_interrupt_vqpairs(sc);
1207 
1208 	vioif_update_link_status(sc);
1209 	ifp->if_flags |= IFF_RUNNING;
1210 	ifp->if_flags &= ~IFF_OACTIVE;
1211 	r = vioif_rx_filter(sc);
1212 
1213 	return r;
1214 }
1215 
1216 static void
1217 vioif_stop(struct ifnet *ifp, int disable)
1218 {
1219 	struct vioif_softc *sc = ifp->if_softc;
1220 	struct virtio_softc *vsc = sc->sc_virtio;
1221 	struct vioif_txqueue *txq;
1222 	struct vioif_rxqueue *rxq;
1223 	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
1224 	int i;
1225 
1226 	/* disable interrupts */
1227 	vioif_disable_interrupt_vqpairs(sc);
1228 	if (sc->sc_has_ctrl)
1229 		virtio_stop_vq_intr(vsc, ctrlq->ctrlq_vq);
1230 
1231 	/*
1232 	 * stop all packet processing:
1233 	 * 1. stop interrupt handlers by rxq_stopping and txq_stopping
1234 	 * 2. wait for stopping workqueue for packet processing
1235 	 */
1236 	for (i =0; i < sc->sc_act_nvq_pairs; i++) {
1237 		txq = &sc->sc_txq[i];
1238 		rxq = &sc->sc_rxq[i];
1239 
1240 		mutex_enter(rxq->rxq_lock);
1241 		rxq->rxq_stopping = true;
1242 		mutex_exit(rxq->rxq_lock);
1243 		vioif_work_wait(sc->sc_txrx_workqueue, &rxq->rxq_work);
1244 
1245 		mutex_enter(txq->txq_lock);
1246 		txq->txq_stopping = true;
1247 		mutex_exit(txq->txq_lock);
1248 		vioif_work_wait(sc->sc_txrx_workqueue, &txq->txq_work);
1249 	}
1250 
1251 	/* only way to stop I/O and DMA is resetting... */
1252 	virtio_reset(vsc);
1253 
1254 	for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
1255 		vioif_rx_queue_clear(&sc->sc_rxq[i]);
1256 		vioif_tx_queue_clear(&sc->sc_txq[i]);
1257 	}
1258 
1259 	ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);
1260 	sc->sc_link_active = false;
1261 
1262 	for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
1263 		txq = &sc->sc_txq[i];
1264 		rxq = &sc->sc_rxq[i];
1265 
1266 		txq->txq_link_active = false;
1267 
1268 		if (disable)
1269 			vioif_rx_drain(rxq);
1270 
1271 		vioif_tx_drain(txq);
1272 	}
1273 }
1274 
1275 static void
1276 vioif_send_common_locked(struct ifnet *ifp, struct vioif_txqueue *txq,
1277     bool is_transmit)
1278 {
1279 	struct vioif_softc *sc = ifp->if_softc;
1280 	struct virtio_softc *vsc = sc->sc_virtio;
1281 	struct virtqueue *vq = txq->txq_vq;
1282 	struct virtio_net_hdr *hdr;
1283 	struct mbuf *m;
1284 	int queued = 0;
1285 
1286 	KASSERT(mutex_owned(txq->txq_lock));
1287 
1288 	if ((ifp->if_flags & IFF_RUNNING) == 0)
1289 		return;
1290 
1291 	if (!txq->txq_link_active || txq->txq_stopping)
1292 		return;
1293 
1294 	if ((ifp->if_flags & IFF_OACTIVE) != 0 && !is_transmit)
1295 		return;
1296 
1297 	for (;;) {
1298 		int slot, r;
1299 
1300 		if (is_transmit)
1301 			m = pcq_get(txq->txq_intrq);
1302 		else
1303 			IFQ_DEQUEUE(&ifp->if_snd, m);
1304 
1305 		if (m == NULL)
1306 			break;
1307 
1308 		r = virtio_enqueue_prep(vsc, vq, &slot);
1309 		if (r == EAGAIN) {
1310 			ifp->if_flags |= IFF_OACTIVE;
1311 			m_freem(m);
1312 			break;
1313 		}
1314 		if (r != 0)
1315 			panic("enqueue_prep for a tx buffer");
1316 
1317 		r = bus_dmamap_load_mbuf(virtio_dmat(vsc),
1318 		    txq->txq_dmamaps[slot], m, BUS_DMA_WRITE | BUS_DMA_NOWAIT);
1319 		if (r != 0) {
1320 			/* maybe just too fragmented */
1321 			struct mbuf *newm;
1322 
1323 			newm = m_defrag(m, M_NOWAIT);
1324 			if (newm == NULL) {
1325 				txq->txq_defrag_failed.ev_count++;
1326 				goto skip;
1327 			}
1328 
1329 			m = newm;
1330 			r = bus_dmamap_load_mbuf(virtio_dmat(vsc),
1331 			    txq->txq_dmamaps[slot], m,
1332 			    BUS_DMA_WRITE | BUS_DMA_NOWAIT);
1333 			if (r != 0) {
1334 				txq->txq_mbuf_load_failed.ev_count++;
1335 skip:
1336 				m_freem(m);
1337 				virtio_enqueue_abort(vsc, vq, slot);
1338 				continue;
1339 			}
1340 		}
1341 
1342 		/* This should actually never fail */
1343 		r = virtio_enqueue_reserve(vsc, vq, slot,
1344 		    txq->txq_dmamaps[slot]->dm_nsegs + 1);
1345 		if (r != 0) {
1346 			txq->txq_enqueue_reserve_failed.ev_count++;
1347 			bus_dmamap_unload(virtio_dmat(vsc),
1348 			     txq->txq_dmamaps[slot]);
1349 			/* slot already freed by virtio_enqueue_reserve */
1350 			m_freem(m);
1351 			continue;
1352 		}
1353 
1354 		txq->txq_mbufs[slot] = m;
1355 
1356 		hdr = &txq->txq_hdrs[slot];
1357 		memset(hdr, 0, sc->sc_hdr_size);
1358 		bus_dmamap_sync(virtio_dmat(vsc), txq->txq_dmamaps[slot],
1359 		    0, txq->txq_dmamaps[slot]->dm_mapsize,
1360 		    BUS_DMASYNC_PREWRITE);
1361 		bus_dmamap_sync(virtio_dmat(vsc), txq->txq_hdr_dmamaps[slot],
1362 		    0, txq->txq_hdr_dmamaps[slot]->dm_mapsize,
1363 		    BUS_DMASYNC_PREWRITE);
1364 		virtio_enqueue(vsc, vq, slot, txq->txq_hdr_dmamaps[slot], true);
1365 		virtio_enqueue(vsc, vq, slot, txq->txq_dmamaps[slot], true);
1366 		virtio_enqueue_commit(vsc, vq, slot, false);
1367 
1368 		queued++;
1369 		bpf_mtap(ifp, m, BPF_D_OUT);
1370 	}
1371 
1372 	if (queued > 0) {
1373 		virtio_enqueue_commit(vsc, vq, -1, true);
1374 		ifp->if_timer = 5;
1375 	}
1376 }
1377 
1378 static void
1379 vioif_start_locked(struct ifnet *ifp, struct vioif_txqueue *txq)
1380 {
1381 
1382 	/*
1383 	 * ifp->if_obytes and ifp->if_omcasts are added in if_transmit()@if.c.
1384 	 */
1385 	vioif_send_common_locked(ifp, txq, false);
1386 
1387 }
1388 
1389 static void
1390 vioif_start(struct ifnet *ifp)
1391 {
1392 	struct vioif_softc *sc = ifp->if_softc;
1393 	struct vioif_txqueue *txq = &sc->sc_txq[0];
1394 
1395 #ifdef VIOIF_MPSAFE
1396 	KASSERT(if_is_mpsafe(ifp));
1397 #endif
1398 
1399 	mutex_enter(txq->txq_lock);
1400 	vioif_start_locked(ifp, txq);
1401 	mutex_exit(txq->txq_lock);
1402 }
1403 
1404 static inline int
1405 vioif_select_txqueue(struct ifnet *ifp, struct mbuf *m)
1406 {
1407 	struct vioif_softc *sc = ifp->if_softc;
1408 	u_int cpuid = cpu_index(curcpu());
1409 
1410 	return cpuid % sc->sc_act_nvq_pairs;
1411 }
1412 
1413 static void
1414 vioif_transmit_locked(struct ifnet *ifp, struct vioif_txqueue *txq)
1415 {
1416 
1417 	vioif_send_common_locked(ifp, txq, true);
1418 }
1419 
1420 static int
1421 vioif_transmit(struct ifnet *ifp, struct mbuf *m)
1422 {
1423 	struct vioif_softc *sc = ifp->if_softc;
1424 	struct vioif_txqueue *txq;
1425 	int qid;
1426 
1427 	qid = vioif_select_txqueue(ifp, m);
1428 	txq = &sc->sc_txq[qid];
1429 
1430 	if (__predict_false(!pcq_put(txq->txq_intrq, m))) {
1431 		m_freem(m);
1432 		return ENOBUFS;
1433 	}
1434 
1435 	net_stat_ref_t nsr = IF_STAT_GETREF(ifp);
1436 	if_statadd_ref(nsr, if_obytes, m->m_pkthdr.len);
1437 	if (m->m_flags & M_MCAST)
1438 		if_statinc_ref(nsr, if_omcasts);
1439 	IF_STAT_PUTREF(ifp);
1440 
1441 	if (mutex_tryenter(txq->txq_lock)) {
1442 		vioif_transmit_locked(ifp, txq);
1443 		mutex_exit(txq->txq_lock);
1444 	}
1445 
1446 	return 0;
1447 }
1448 
1449 static void
1450 vioif_deferred_transmit(void *arg)
1451 {
1452 	struct vioif_txqueue *txq = arg;
1453 	struct virtio_softc *vsc = txq->txq_vq->vq_owner;
1454 	struct vioif_softc *sc = device_private(virtio_child(vsc));
1455 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1456 
1457 	mutex_enter(txq->txq_lock);
1458 	vioif_send_common_locked(ifp, txq, true);
1459 	mutex_exit(txq->txq_lock);
1460 }
1461 
1462 static int
1463 vioif_ioctl(struct ifnet *ifp, u_long cmd, void *data)
1464 {
1465 	int s, r;
1466 
1467 	s = splnet();
1468 
1469 	r = ether_ioctl(ifp, cmd, data);
1470 	if (r == ENETRESET && (cmd == SIOCADDMULTI || cmd == SIOCDELMULTI)) {
1471 		if (ifp->if_flags & IFF_RUNNING) {
1472 			r = vioif_rx_filter(ifp->if_softc);
1473 		} else {
1474 			r = 0;
1475 		}
1476 	}
1477 
1478 	splx(s);
1479 
1480 	return r;
1481 }
1482 
1483 void
1484 vioif_watchdog(struct ifnet *ifp)
1485 {
1486 	struct vioif_softc *sc = ifp->if_softc;
1487 	int i;
1488 
1489 	if (ifp->if_flags & IFF_RUNNING) {
1490 		for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
1491 			vioif_tx_queue_clear(&sc->sc_txq[i]);
1492 		}
1493 	}
1494 }
1495 
1496 /*
1497  * Receive implementation
1498  */
1499 /* allocate and initialize a mbuf for receive */
1500 static int
1501 vioif_add_rx_mbuf(struct vioif_rxqueue *rxq, int i)
1502 {
1503 	struct virtio_softc *vsc = rxq->rxq_vq->vq_owner;
1504 	struct mbuf *m;
1505 	int r;
1506 
1507 	MGETHDR(m, M_DONTWAIT, MT_DATA);
1508 	if (m == NULL)
1509 		return ENOBUFS;
1510 	MCLGET(m, M_DONTWAIT);
1511 	if ((m->m_flags & M_EXT) == 0) {
1512 		m_freem(m);
1513 		return ENOBUFS;
1514 	}
1515 	rxq->rxq_mbufs[i] = m;
1516 	m->m_len = m->m_pkthdr.len = m->m_ext.ext_size;
1517 	r = bus_dmamap_load_mbuf(virtio_dmat(vsc),
1518 	    rxq->rxq_dmamaps[i], m, BUS_DMA_READ | BUS_DMA_NOWAIT);
1519 	if (r) {
1520 		m_freem(m);
1521 		rxq->rxq_mbufs[i] = NULL;
1522 		return r;
1523 	}
1524 
1525 	return 0;
1526 }
1527 
1528 /* free a mbuf for receive */
1529 static void
1530 vioif_free_rx_mbuf(struct vioif_rxqueue *rxq, int i)
1531 {
1532 	struct virtio_softc *vsc = rxq->rxq_vq->vq_owner;
1533 
1534 	bus_dmamap_unload(virtio_dmat(vsc), rxq->rxq_dmamaps[i]);
1535 	m_freem(rxq->rxq_mbufs[i]);
1536 	rxq->rxq_mbufs[i] = NULL;
1537 }
1538 
1539 /* add mbufs for all the empty receive slots */
1540 static void
1541 vioif_populate_rx_mbufs_locked(struct vioif_softc *sc, struct vioif_rxqueue *rxq)
1542 {
1543 	struct virtqueue *vq = rxq->rxq_vq;
1544 	struct virtio_softc *vsc = vq->vq_owner;
1545 	int i, r, ndone = 0;
1546 
1547 	KASSERT(mutex_owned(rxq->rxq_lock));
1548 
1549 	if (rxq->rxq_stopping)
1550 		return;
1551 
1552 	for (i = 0; i < vq->vq_num; i++) {
1553 		int slot;
1554 		r = virtio_enqueue_prep(vsc, vq, &slot);
1555 		if (r == EAGAIN)
1556 			break;
1557 		if (r != 0)
1558 			panic("enqueue_prep for rx buffers");
1559 		if (rxq->rxq_mbufs[slot] == NULL) {
1560 			r = vioif_add_rx_mbuf(rxq, slot);
1561 			if (r != 0) {
1562 				rxq->rxq_mbuf_add_failed.ev_count++;
1563 				break;
1564 			}
1565 		}
1566 		r = virtio_enqueue_reserve(vsc, vq, slot,
1567 		    rxq->rxq_dmamaps[slot]->dm_nsegs + 1);
1568 		if (r != 0) {
1569 			vioif_free_rx_mbuf(rxq, slot);
1570 			break;
1571 		}
1572 		bus_dmamap_sync(virtio_dmat(vsc), rxq->rxq_hdr_dmamaps[slot],
1573 		    0, sc->sc_hdr_size, BUS_DMASYNC_PREREAD);
1574 		bus_dmamap_sync(virtio_dmat(vsc), rxq->rxq_dmamaps[slot],
1575 		    0, MCLBYTES, BUS_DMASYNC_PREREAD);
1576 		virtio_enqueue(vsc, vq, slot, rxq->rxq_hdr_dmamaps[slot],
1577 		    false);
1578 		virtio_enqueue(vsc, vq, slot, rxq->rxq_dmamaps[slot], false);
1579 		virtio_enqueue_commit(vsc, vq, slot, false);
1580 		ndone++;
1581 	}
1582 	if (ndone > 0)
1583 		virtio_enqueue_commit(vsc, vq, -1, true);
1584 }
1585 
1586 static void
1587 vioif_rx_queue_clear(struct vioif_rxqueue *rxq)
1588 {
1589 	struct virtqueue *vq = rxq->rxq_vq;
1590 	struct virtio_softc *vsc = vq->vq_owner;
1591 	struct vioif_softc *sc = device_private(virtio_child(vsc));
1592 	u_int limit = UINT_MAX;
1593 	bool more;
1594 
1595 	KASSERT(rxq->rxq_stopping);
1596 
1597 	mutex_enter(rxq->rxq_lock);
1598 	for (;;) {
1599 		more = vioif_rx_deq_locked(sc, vsc, rxq, limit);
1600 		if (more == false)
1601 			break;
1602 	}
1603 	mutex_exit(rxq->rxq_lock);
1604 }
1605 
1606 /* dequeue received packets */
1607 static bool
1608 vioif_rx_deq_locked(struct vioif_softc *sc, struct virtio_softc *vsc,
1609     struct vioif_rxqueue *rxq, u_int limit)
1610 {
1611 	struct virtqueue *vq = rxq->rxq_vq;
1612 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1613 	struct mbuf *m;
1614 	int slot, len;
1615 	bool more = false, dequeued = false;
1616 
1617 	KASSERT(mutex_owned(rxq->rxq_lock));
1618 
1619 	if (virtio_vq_is_enqueued(vsc, vq) == false)
1620 		return false;
1621 
1622 	for (;;) {
1623 		if (limit-- == 0) {
1624 			more = true;
1625 			break;
1626 		}
1627 
1628 		if (virtio_dequeue(vsc, vq, &slot, &len) != 0)
1629 			break;
1630 
1631 		dequeued = true;
1632 
1633 		len -= sc->sc_hdr_size;
1634 		bus_dmamap_sync(virtio_dmat(vsc), rxq->rxq_hdr_dmamaps[slot],
1635 		    0, sc->sc_hdr_size, BUS_DMASYNC_POSTREAD);
1636 		bus_dmamap_sync(virtio_dmat(vsc), rxq->rxq_dmamaps[slot],
1637 		    0, MCLBYTES, BUS_DMASYNC_POSTREAD);
1638 		m = rxq->rxq_mbufs[slot];
1639 		KASSERT(m != NULL);
1640 		bus_dmamap_unload(virtio_dmat(vsc), rxq->rxq_dmamaps[slot]);
1641 		rxq->rxq_mbufs[slot] = NULL;
1642 		virtio_dequeue_commit(vsc, vq, slot);
1643 		m_set_rcvif(m, ifp);
1644 		m->m_len = m->m_pkthdr.len = len;
1645 
1646 		mutex_exit(rxq->rxq_lock);
1647 		if_percpuq_enqueue(ifp->if_percpuq, m);
1648 		mutex_enter(rxq->rxq_lock);
1649 
1650 		if (rxq->rxq_stopping)
1651 			break;
1652 	}
1653 
1654 	if (dequeued)
1655 		vioif_populate_rx_mbufs_locked(sc, rxq);
1656 
1657 	return more;
1658 }
1659 
1660 /* rx interrupt; call _dequeue above and schedule a softint */
1661 
1662 static void
1663 vioif_rx_handle_locked(void *xrxq, u_int limit)
1664 {
1665 	struct vioif_rxqueue *rxq = xrxq;
1666 	struct virtqueue *vq = rxq->rxq_vq;
1667 	struct virtio_softc *vsc = vq->vq_owner;
1668 	struct vioif_softc *sc = device_private(virtio_child(vsc));
1669 	bool more;
1670 
1671 	KASSERT(!rxq->rxq_stopping);
1672 
1673 	more = vioif_rx_deq_locked(sc, vsc, rxq, limit);
1674 	if (more) {
1675 		vioif_rx_sched_handle(sc, rxq);
1676 		return;
1677 	}
1678 	more = virtio_start_vq_intr(vsc, rxq->rxq_vq);
1679 	if (more) {
1680 		vioif_rx_sched_handle(sc, rxq);
1681 		return;
1682 	}
1683 	atomic_store_relaxed(&rxq->rxq_active, false);
1684 }
1685 
1686 static int
1687 vioif_rx_intr(void *arg)
1688 {
1689 	struct vioif_rxqueue *rxq = arg;
1690 	struct virtqueue *vq = rxq->rxq_vq;
1691 	struct virtio_softc *vsc = vq->vq_owner;
1692 	struct vioif_softc *sc = device_private(virtio_child(vsc));
1693 	u_int limit;
1694 
1695 	limit = sc->sc_rx_intr_process_limit;
1696 
1697 	if (atomic_load_relaxed(&rxq->rxq_active) == true)
1698 		return 1;
1699 
1700 	mutex_enter(rxq->rxq_lock);
1701 
1702 	if (!rxq->rxq_stopping) {
1703 		rxq->rxq_workqueue = sc->sc_txrx_workqueue_sysctl;
1704 
1705 		virtio_stop_vq_intr(vsc, vq);
1706 		atomic_store_relaxed(&rxq->rxq_active, true);
1707 
1708 		vioif_rx_handle_locked(rxq, limit);
1709 	}
1710 
1711 	mutex_exit(rxq->rxq_lock);
1712 	return 1;
1713 }
1714 
1715 static void
1716 vioif_rx_handle(void *xrxq)
1717 {
1718 	struct vioif_rxqueue *rxq = xrxq;
1719 	struct virtqueue *vq = rxq->rxq_vq;
1720 	struct virtio_softc *vsc = vq->vq_owner;
1721 	struct vioif_softc *sc = device_private(virtio_child(vsc));
1722 	u_int limit;
1723 
1724 	limit = sc->sc_rx_process_limit;
1725 
1726 	mutex_enter(rxq->rxq_lock);
1727 
1728 	if (!rxq->rxq_stopping)
1729 		vioif_rx_handle_locked(rxq, limit);
1730 
1731 	mutex_exit(rxq->rxq_lock);
1732 }
1733 
1734 static void
1735 vioif_rx_sched_handle(struct vioif_softc *sc, struct vioif_rxqueue *rxq)
1736 {
1737 
1738 	KASSERT(mutex_owned(rxq->rxq_lock));
1739 
1740 	if (rxq->rxq_stopping)
1741 		return;
1742 
1743 	if (rxq->rxq_workqueue)
1744 		vioif_work_add(sc->sc_txrx_workqueue, &rxq->rxq_work);
1745 	else
1746 		softint_schedule(rxq->rxq_handle_si);
1747 }
1748 
1749 /* free all the mbufs; called from if_stop(disable) */
1750 static void
1751 vioif_rx_drain(struct vioif_rxqueue *rxq)
1752 {
1753 	struct virtqueue *vq = rxq->rxq_vq;
1754 	int i;
1755 
1756 	for (i = 0; i < vq->vq_num; i++) {
1757 		if (rxq->rxq_mbufs[i] == NULL)
1758 			continue;
1759 		vioif_free_rx_mbuf(rxq, i);
1760 	}
1761 }
1762 
1763 /*
1764  * Transmition implementation
1765  */
1766 /* actual transmission is done in if_start */
1767 /* tx interrupt; dequeue and free mbufs */
1768 /*
1769  * tx interrupt is actually disabled; this should be called upon
1770  * tx vq full and watchdog
1771  */
1772 
1773 static void
1774 vioif_tx_handle_locked(struct vioif_txqueue *txq, u_int limit)
1775 {
1776 	struct virtqueue *vq = txq->txq_vq;
1777 	struct virtio_softc *vsc = vq->vq_owner;
1778 	struct vioif_softc *sc = device_private(virtio_child(vsc));
1779 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1780 	bool more;
1781 
1782 	KASSERT(!txq->txq_stopping);
1783 
1784 	more = vioif_tx_deq_locked(sc, vsc, txq, limit);
1785 	if (more) {
1786 		vioif_tx_sched_handle(sc, txq);
1787 		return;
1788 	}
1789 
1790 	if (virtio_features(vsc) & VIRTIO_F_RING_EVENT_IDX)
1791 		more = virtio_postpone_intr_smart(vsc, vq);
1792 	else
1793 		more = virtio_start_vq_intr(vsc, vq);
1794 	if (more) {
1795 		vioif_tx_sched_handle(sc, txq);
1796 		return;
1797 	}
1798 
1799 	atomic_store_relaxed(&txq->txq_active, false);
1800 	/* for ALTQ */
1801 	if (txq == &sc->sc_txq[0]) {
1802 		if_schedule_deferred_start(ifp);
1803 		ifp->if_flags &= ~IFF_OACTIVE;
1804 	}
1805 	softint_schedule(txq->txq_deferred_transmit);
1806 }
1807 
1808 
1809 static int
1810 vioif_tx_intr(void *arg)
1811 {
1812 	struct vioif_txqueue *txq = arg;
1813 	struct virtqueue *vq = txq->txq_vq;
1814 	struct virtio_softc *vsc = vq->vq_owner;
1815 	struct vioif_softc *sc = device_private(virtio_child(vsc));
1816 	u_int limit;
1817 
1818 	limit = sc->sc_tx_intr_process_limit;
1819 
1820 	if (atomic_load_relaxed(&txq->txq_active) == true)
1821 		return 1;
1822 
1823 	mutex_enter(txq->txq_lock);
1824 
1825 	if (!txq->txq_stopping) {
1826 		txq->txq_workqueue = sc->sc_txrx_workqueue_sysctl;
1827 
1828 		virtio_stop_vq_intr(vsc, vq);
1829 		atomic_store_relaxed(&txq->txq_active, true);
1830 
1831 		vioif_tx_handle_locked(txq, limit);
1832 	}
1833 
1834 	mutex_exit(txq->txq_lock);
1835 
1836 	return 1;
1837 }
1838 
1839 static void
1840 vioif_tx_handle(void *xtxq)
1841 {
1842 	struct vioif_txqueue *txq = xtxq;
1843 	struct virtqueue *vq = txq->txq_vq;
1844 	struct virtio_softc *vsc = vq->vq_owner;
1845 	struct vioif_softc *sc = device_private(virtio_child(vsc));
1846 	u_int limit;
1847 
1848 	limit = sc->sc_tx_process_limit;
1849 
1850 	mutex_enter(txq->txq_lock);
1851 	if (!txq->txq_stopping)
1852 		vioif_tx_handle_locked(txq, limit);
1853 	mutex_exit(txq->txq_lock);
1854 }
1855 
1856 static void
1857 vioif_tx_sched_handle(struct vioif_softc *sc, struct vioif_txqueue *txq)
1858 {
1859 
1860 	KASSERT(mutex_owned(txq->txq_lock));
1861 
1862 	if (txq->txq_stopping)
1863 		return;
1864 
1865 	if (txq->txq_workqueue)
1866 		vioif_work_add(sc->sc_txrx_workqueue, &txq->txq_work);
1867 	else
1868 		softint_schedule(txq->txq_handle_si);
1869 }
1870 
1871 static void
1872 vioif_tx_queue_clear(struct vioif_txqueue *txq)
1873 {
1874 	struct virtqueue *vq = txq->txq_vq;
1875 	struct virtio_softc *vsc = vq->vq_owner;
1876 	struct vioif_softc *sc = device_private(virtio_child(vsc));
1877 	u_int limit = UINT_MAX;
1878 	bool more;
1879 
1880 	mutex_enter(txq->txq_lock);
1881 	for (;;) {
1882 		more = vioif_tx_deq_locked(sc, vsc, txq, limit);
1883 		if (more == false)
1884 			break;
1885 	}
1886 	mutex_exit(txq->txq_lock);
1887 }
1888 
1889 static bool
1890 vioif_tx_deq_locked(struct vioif_softc *sc, struct virtio_softc *vsc,
1891     struct vioif_txqueue *txq, u_int limit)
1892 {
1893 	struct virtqueue *vq = txq->txq_vq;
1894 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1895 	struct mbuf *m;
1896 	int slot, len;
1897 	bool more = false;
1898 
1899 	KASSERT(mutex_owned(txq->txq_lock));
1900 
1901 	if (virtio_vq_is_enqueued(vsc, vq) == false)
1902 		return false;
1903 
1904 	for (;;) {
1905 		if (limit-- == 0) {
1906 			more = true;
1907 			break;
1908 		}
1909 
1910 		if (virtio_dequeue(vsc, vq, &slot, &len) != 0)
1911 			break;
1912 
1913 		bus_dmamap_sync(virtio_dmat(vsc), txq->txq_hdr_dmamaps[slot],
1914 		    0, sc->sc_hdr_size, BUS_DMASYNC_POSTWRITE);
1915 		bus_dmamap_sync(virtio_dmat(vsc), txq->txq_dmamaps[slot],
1916 		    0, txq->txq_dmamaps[slot]->dm_mapsize,
1917 		    BUS_DMASYNC_POSTWRITE);
1918 		m = txq->txq_mbufs[slot];
1919 		bus_dmamap_unload(virtio_dmat(vsc), txq->txq_dmamaps[slot]);
1920 		txq->txq_mbufs[slot] = NULL;
1921 		virtio_dequeue_commit(vsc, vq, slot);
1922 		if_statinc(ifp, if_opackets);
1923 		m_freem(m);
1924 	}
1925 
1926 	return more;
1927 }
1928 
1929 /* free all the mbufs already put on vq; called from if_stop(disable) */
1930 static void
1931 vioif_tx_drain(struct vioif_txqueue *txq)
1932 {
1933 	struct virtqueue *vq = txq->txq_vq;
1934 	struct virtio_softc *vsc = vq->vq_owner;
1935 	int i;
1936 
1937 	KASSERT(txq->txq_stopping);
1938 
1939 	for (i = 0; i < vq->vq_num; i++) {
1940 		if (txq->txq_mbufs[i] == NULL)
1941 			continue;
1942 		bus_dmamap_unload(virtio_dmat(vsc), txq->txq_dmamaps[i]);
1943 		m_freem(txq->txq_mbufs[i]);
1944 		txq->txq_mbufs[i] = NULL;
1945 	}
1946 }
1947 
1948 /*
1949  * Control vq
1950  */
1951 /* issue a VIRTIO_NET_CTRL_RX class command and wait for completion */
1952 static void
1953 vioif_ctrl_acquire(struct vioif_softc *sc)
1954 {
1955 	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
1956 
1957 	mutex_enter(&ctrlq->ctrlq_wait_lock);
1958 	while (ctrlq->ctrlq_inuse != FREE)
1959 		cv_wait(&ctrlq->ctrlq_wait, &ctrlq->ctrlq_wait_lock);
1960 	ctrlq->ctrlq_inuse = INUSE;
1961 	ctrlq->ctrlq_owner = curlwp;
1962 	mutex_exit(&ctrlq->ctrlq_wait_lock);
1963 }
1964 
1965 static void
1966 vioif_ctrl_release(struct vioif_softc *sc)
1967 {
1968 	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
1969 
1970 	KASSERT(ctrlq->ctrlq_inuse != FREE);
1971 	KASSERT(ctrlq->ctrlq_owner == curlwp);
1972 
1973 	mutex_enter(&ctrlq->ctrlq_wait_lock);
1974 	ctrlq->ctrlq_inuse = FREE;
1975 	ctrlq->ctrlq_owner = NULL;
1976 	cv_signal(&ctrlq->ctrlq_wait);
1977 	mutex_exit(&ctrlq->ctrlq_wait_lock);
1978 }
1979 
1980 static int
1981 vioif_ctrl_load_cmdspec(struct vioif_softc *sc,
1982     struct vioif_ctrl_cmdspec *specs, int nspecs)
1983 {
1984 	struct virtio_softc *vsc = sc->sc_virtio;
1985 	int i, r, loaded;
1986 
1987 	loaded = 0;
1988 	for (i = 0; i < nspecs; i++) {
1989 		r = bus_dmamap_load(virtio_dmat(vsc),
1990 		    specs[i].dmamap, specs[i].buf, specs[i].bufsize,
1991 		    NULL, BUS_DMA_WRITE | BUS_DMA_NOWAIT);
1992 		if (r) {
1993 			sc->sc_ctrlq.ctrlq_cmd_load_failed.ev_count++;
1994 			goto err;
1995 		}
1996 		loaded++;
1997 
1998 	}
1999 
2000 	return r;
2001 
2002 err:
2003 	for (i = 0; i < loaded; i++) {
2004 		bus_dmamap_unload(virtio_dmat(vsc), specs[i].dmamap);
2005 	}
2006 
2007 	return r;
2008 }
2009 
2010 static void
2011 vioif_ctrl_unload_cmdspec(struct vioif_softc *sc,
2012     struct vioif_ctrl_cmdspec *specs, int nspecs)
2013 {
2014 	struct virtio_softc *vsc = sc->sc_virtio;
2015 	int i;
2016 
2017 	for (i = 0; i < nspecs; i++) {
2018 		bus_dmamap_unload(virtio_dmat(vsc), specs[i].dmamap);
2019 	}
2020 }
2021 
2022 static int
2023 vioif_ctrl_send_command(struct vioif_softc *sc, uint8_t class, uint8_t cmd,
2024     struct vioif_ctrl_cmdspec *specs, int nspecs)
2025 {
2026 	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
2027 	struct virtqueue *vq = ctrlq->ctrlq_vq;
2028 	struct virtio_softc *vsc = sc->sc_virtio;
2029 	int i, r, slot;
2030 
2031 	ctrlq->ctrlq_cmd->class = class;
2032 	ctrlq->ctrlq_cmd->command = cmd;
2033 
2034 	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_cmd_dmamap,
2035 	    0, sizeof(struct virtio_net_ctrl_cmd), BUS_DMASYNC_PREWRITE);
2036 	for (i = 0; i < nspecs; i++) {
2037 		bus_dmamap_sync(virtio_dmat(vsc), specs[i].dmamap,
2038 		    0, specs[i].bufsize, BUS_DMASYNC_PREWRITE);
2039 	}
2040 	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_status_dmamap,
2041 	    0, sizeof(struct virtio_net_ctrl_status), BUS_DMASYNC_PREREAD);
2042 
2043 	/* we need to explicitly (re)start vq intr when using RING EVENT IDX */
2044 	if (virtio_features(vsc) & VIRTIO_F_RING_EVENT_IDX)
2045 		virtio_start_vq_intr(vsc, ctrlq->ctrlq_vq);
2046 
2047 	r = virtio_enqueue_prep(vsc, vq, &slot);
2048 	if (r != 0)
2049 		panic("%s: control vq busy!?", device_xname(sc->sc_dev));
2050 	r = virtio_enqueue_reserve(vsc, vq, slot, nspecs + 2);
2051 	if (r != 0)
2052 		panic("%s: control vq busy!?", device_xname(sc->sc_dev));
2053 	virtio_enqueue(vsc, vq, slot, ctrlq->ctrlq_cmd_dmamap, true);
2054 	for (i = 0; i < nspecs; i++) {
2055 		virtio_enqueue(vsc, vq, slot, specs[i].dmamap, true);
2056 	}
2057 	virtio_enqueue(vsc, vq, slot, ctrlq->ctrlq_status_dmamap, false);
2058 	virtio_enqueue_commit(vsc, vq, slot, true);
2059 
2060 	/* wait for done */
2061 	mutex_enter(&ctrlq->ctrlq_wait_lock);
2062 	while (ctrlq->ctrlq_inuse != DONE)
2063 		cv_wait(&ctrlq->ctrlq_wait, &ctrlq->ctrlq_wait_lock);
2064 	mutex_exit(&ctrlq->ctrlq_wait_lock);
2065 	/* already dequeueued */
2066 
2067 	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_cmd_dmamap, 0,
2068 	    sizeof(struct virtio_net_ctrl_cmd), BUS_DMASYNC_POSTWRITE);
2069 	for (i = 0; i < nspecs; i++) {
2070 		bus_dmamap_sync(virtio_dmat(vsc), specs[i].dmamap, 0,
2071 		    specs[i].bufsize, BUS_DMASYNC_POSTWRITE);
2072 	}
2073 	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_status_dmamap, 0,
2074 	    sizeof(struct virtio_net_ctrl_status), BUS_DMASYNC_POSTREAD);
2075 
2076 	if (ctrlq->ctrlq_status->ack == VIRTIO_NET_OK)
2077 		r = 0;
2078 	else {
2079 		device_printf(sc->sc_dev, "failed setting rx mode\n");
2080 		sc->sc_ctrlq.ctrlq_cmd_failed.ev_count++;
2081 		r = EIO;
2082 	}
2083 
2084 	return r;
2085 }
2086 
2087 static int
2088 vioif_ctrl_rx(struct vioif_softc *sc, int cmd, bool onoff)
2089 {
2090 	struct virtio_net_ctrl_rx *rx = sc->sc_ctrlq.ctrlq_rx;
2091 	struct vioif_ctrl_cmdspec specs[1];
2092 	int r;
2093 
2094 	if (!sc->sc_has_ctrl)
2095 		return ENOTSUP;
2096 
2097 	vioif_ctrl_acquire(sc);
2098 
2099 	rx->onoff = onoff;
2100 	specs[0].dmamap = sc->sc_ctrlq.ctrlq_rx_dmamap;
2101 	specs[0].buf = rx;
2102 	specs[0].bufsize = sizeof(*rx);
2103 
2104 	r = vioif_ctrl_send_command(sc, VIRTIO_NET_CTRL_RX, cmd,
2105 	    specs, __arraycount(specs));
2106 
2107 	vioif_ctrl_release(sc);
2108 	return r;
2109 }
2110 
2111 static int
2112 vioif_set_promisc(struct vioif_softc *sc, bool onoff)
2113 {
2114 	return vioif_ctrl_rx(sc, VIRTIO_NET_CTRL_RX_PROMISC, onoff);
2115 }
2116 
2117 static int
2118 vioif_set_allmulti(struct vioif_softc *sc, bool onoff)
2119 {
2120 	return vioif_ctrl_rx(sc, VIRTIO_NET_CTRL_RX_ALLMULTI, onoff);
2121 }
2122 
2123 /* issue VIRTIO_NET_CTRL_MAC_TABLE_SET command and wait for completion */
2124 static int
2125 vioif_set_rx_filter(struct vioif_softc *sc)
2126 {
2127 	/* filter already set in ctrlq->ctrlq_mac_tbl */
2128 	struct virtio_softc *vsc = sc->sc_virtio;
2129 	struct virtio_net_ctrl_mac_tbl *mac_tbl_uc, *mac_tbl_mc;
2130 	struct vioif_ctrl_cmdspec specs[2];
2131 	int nspecs = __arraycount(specs);
2132 	int r;
2133 
2134 	mac_tbl_uc = sc->sc_ctrlq.ctrlq_mac_tbl_uc;
2135 	mac_tbl_mc = sc->sc_ctrlq.ctrlq_mac_tbl_mc;
2136 
2137 	if (!sc->sc_has_ctrl)
2138 		return ENOTSUP;
2139 
2140 	vioif_ctrl_acquire(sc);
2141 
2142 	specs[0].dmamap = sc->sc_ctrlq.ctrlq_tbl_uc_dmamap;
2143 	specs[0].buf = mac_tbl_uc;
2144 	specs[0].bufsize = sizeof(*mac_tbl_uc)
2145 	    + (ETHER_ADDR_LEN * virtio_rw32(vsc, mac_tbl_uc->nentries));
2146 
2147 	specs[1].dmamap = sc->sc_ctrlq.ctrlq_tbl_mc_dmamap;
2148 	specs[1].buf = mac_tbl_mc;
2149 	specs[1].bufsize = sizeof(*mac_tbl_mc)
2150 	    + (ETHER_ADDR_LEN * virtio_rw32(vsc, mac_tbl_mc->nentries));
2151 
2152 	r = vioif_ctrl_load_cmdspec(sc, specs, nspecs);
2153 	if (r != 0)
2154 		goto out;
2155 
2156 	r = vioif_ctrl_send_command(sc,
2157 	    VIRTIO_NET_CTRL_MAC, VIRTIO_NET_CTRL_MAC_TABLE_SET,
2158 	    specs, nspecs);
2159 
2160 	vioif_ctrl_unload_cmdspec(sc, specs, nspecs);
2161 
2162 out:
2163 	vioif_ctrl_release(sc);
2164 
2165 	return r;
2166 }
2167 
2168 static int
2169 vioif_set_mac_addr(struct vioif_softc *sc)
2170 {
2171 	struct virtio_net_ctrl_mac_addr *ma =
2172 	    sc->sc_ctrlq.ctrlq_mac_addr;
2173 	struct vioif_ctrl_cmdspec specs[1];
2174 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
2175 	int nspecs = __arraycount(specs);
2176 	uint64_t features;
2177 	int r;
2178 	size_t i;
2179 
2180 	if (!sc->sc_has_ctrl)
2181 		return ENOTSUP;
2182 
2183 	if (memcmp(CLLADDR(ifp->if_sadl), sc->sc_mac,
2184 	    ETHER_ADDR_LEN) == 0) {
2185 		return 0;
2186 	}
2187 
2188 	memcpy(sc->sc_mac, CLLADDR(ifp->if_sadl), ETHER_ADDR_LEN);
2189 
2190 	features = virtio_features(sc->sc_virtio);
2191 	if (features & VIRTIO_NET_F_CTRL_MAC_ADDR) {
2192 		vioif_ctrl_acquire(sc);
2193 
2194 		memcpy(ma->mac, sc->sc_mac, ETHER_ADDR_LEN);
2195 		specs[0].dmamap = sc->sc_ctrlq.ctrlq_mac_addr_dmamap;
2196 		specs[0].buf = ma;
2197 		specs[0].bufsize = sizeof(*ma);
2198 
2199 		r = vioif_ctrl_send_command(sc,
2200 		    VIRTIO_NET_CTRL_MAC, VIRTIO_NET_CTRL_MAC_ADDR_SET,
2201 		    specs, nspecs);
2202 
2203 		vioif_ctrl_release(sc);
2204 	} else {
2205 		for (i = 0; i < __arraycount(sc->sc_mac); i++) {
2206 			virtio_write_device_config_1(sc->sc_virtio,
2207 			    VIRTIO_NET_CONFIG_MAC + i, sc->sc_mac[i]);
2208 		}
2209 		r = 0;
2210 	}
2211 
2212 	return r;
2213 }
2214 
2215 static int
2216 vioif_ctrl_mq_vq_pairs_set(struct vioif_softc *sc, int nvq_pairs)
2217 {
2218 	struct virtio_net_ctrl_mq *mq = sc->sc_ctrlq.ctrlq_mq;
2219 	struct vioif_ctrl_cmdspec specs[1];
2220 	int r;
2221 
2222 	if (!sc->sc_has_ctrl)
2223 		return ENOTSUP;
2224 
2225 	if (nvq_pairs <= 1)
2226 		return EINVAL;
2227 
2228 	vioif_ctrl_acquire(sc);
2229 
2230 	mq->virtqueue_pairs = virtio_rw16(sc->sc_virtio, nvq_pairs);
2231 	specs[0].dmamap = sc->sc_ctrlq.ctrlq_mq_dmamap;
2232 	specs[0].buf = mq;
2233 	specs[0].bufsize = sizeof(*mq);
2234 
2235 	r = vioif_ctrl_send_command(sc,
2236 	    VIRTIO_NET_CTRL_MQ, VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET,
2237 	    specs, __arraycount(specs));
2238 
2239 	vioif_ctrl_release(sc);
2240 
2241 	return r;
2242 }
2243 
2244 /* ctrl vq interrupt; wake up the command issuer */
2245 static int
2246 vioif_ctrl_intr(void *arg)
2247 {
2248 	struct vioif_ctrlqueue *ctrlq = arg;
2249 	struct virtqueue *vq = ctrlq->ctrlq_vq;
2250 	struct virtio_softc *vsc = vq->vq_owner;
2251 	int r, slot;
2252 
2253 	if (virtio_vq_is_enqueued(vsc, vq) == false)
2254 		return 0;
2255 
2256 	r = virtio_dequeue(vsc, vq, &slot, NULL);
2257 	if (r == ENOENT)
2258 		return 0;
2259 	virtio_dequeue_commit(vsc, vq, slot);
2260 
2261 	mutex_enter(&ctrlq->ctrlq_wait_lock);
2262 	ctrlq->ctrlq_inuse = DONE;
2263 	cv_signal(&ctrlq->ctrlq_wait);
2264 	mutex_exit(&ctrlq->ctrlq_wait_lock);
2265 
2266 	return 1;
2267 }
2268 
2269 static int
2270 vioif_ifflags(struct vioif_softc *sc)
2271 {
2272 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
2273 	bool onoff;
2274 	int r;
2275 
2276 	if (!sc->sc_has_ctrl) {
2277 		/* no ctrl vq; always promisc and allmulti */
2278 		ifp->if_flags |= (IFF_PROMISC | IFF_ALLMULTI);
2279 		return 0;
2280 	}
2281 
2282 	onoff = ifp->if_flags & IFF_ALLMULTI ? true : false;
2283 	r = vioif_set_allmulti(sc, onoff);
2284 	if (r != 0) {
2285 		log(LOG_WARNING,
2286 		    "%s: couldn't %sable ALLMULTI\n",
2287 		    ifp->if_xname, onoff ? "en" : "dis");
2288 		if (onoff == false) {
2289 			ifp->if_flags |= IFF_ALLMULTI;
2290 		}
2291 	}
2292 
2293 	onoff = ifp->if_flags & IFF_PROMISC ? true : false;
2294 	r = vioif_set_promisc(sc, onoff);
2295 	if (r != 0) {
2296 		log(LOG_WARNING,
2297 		    "%s: couldn't %sable PROMISC\n",
2298 		    ifp->if_xname, onoff ? "en" : "dis");
2299 		if (onoff == false) {
2300 			ifp->if_flags |= IFF_PROMISC;
2301 		}
2302 	}
2303 
2304 	return 0;
2305 }
2306 
2307 static int
2308 vioif_ifflags_cb(struct ethercom *ec)
2309 {
2310 	struct ifnet *ifp = &ec->ec_if;
2311 	struct vioif_softc *sc = ifp->if_softc;
2312 
2313 	return vioif_ifflags(sc);
2314 }
2315 
2316 /*
2317  * If multicast filter small enough (<=MAXENTRIES) set rx filter
2318  * If large multicast filter exist use ALLMULTI
2319  * If setting rx filter fails fall back to ALLMULTI
2320  */
2321 static int
2322 vioif_rx_filter(struct vioif_softc *sc)
2323 {
2324 	struct virtio_softc *vsc = sc->sc_virtio;
2325 	struct ethercom *ec = &sc->sc_ethercom;
2326 	struct ifnet *ifp = &ec->ec_if;
2327 	struct ether_multi *enm;
2328 	struct ether_multistep step;
2329 	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
2330 	int nentries;
2331 	bool allmulti = 0;
2332 	int r;
2333 
2334 	if (!sc->sc_has_ctrl) {
2335 		goto set_ifflags;
2336 	}
2337 
2338 	memcpy(ctrlq->ctrlq_mac_tbl_uc->macs[0],
2339 	    CLLADDR(ifp->if_sadl), ETHER_ADDR_LEN);
2340 
2341 	nentries = 0;
2342 	allmulti = false;
2343 
2344 	ETHER_LOCK(ec);
2345 	for (ETHER_FIRST_MULTI(step, ec, enm); enm != NULL;
2346 	    ETHER_NEXT_MULTI(step, enm)) {
2347 		if (nentries >= VIRTIO_NET_CTRL_MAC_MAXENTRIES) {
2348 			allmulti = true;
2349 			break;
2350 		}
2351 		if (memcmp(enm->enm_addrlo, enm->enm_addrhi, ETHER_ADDR_LEN)) {
2352 			allmulti = true;
2353 			break;
2354 		}
2355 
2356 		memcpy(ctrlq->ctrlq_mac_tbl_mc->macs[nentries],
2357 		    enm->enm_addrlo, ETHER_ADDR_LEN);
2358 		nentries++;
2359 	}
2360 	ETHER_UNLOCK(ec);
2361 
2362 	r = vioif_set_mac_addr(sc);
2363 	if (r != 0) {
2364 		log(LOG_WARNING, "%s: couldn't set MAC address\n",
2365 		    ifp->if_xname);
2366 	}
2367 
2368 	if (!allmulti) {
2369 		ctrlq->ctrlq_mac_tbl_uc->nentries = virtio_rw32(vsc, 1);
2370 		ctrlq->ctrlq_mac_tbl_mc->nentries = virtio_rw32(vsc, nentries);
2371 		r = vioif_set_rx_filter(sc);
2372 		if (r != 0) {
2373 			allmulti = true; /* fallback */
2374 		}
2375 	}
2376 
2377 	if (allmulti) {
2378 		ctrlq->ctrlq_mac_tbl_uc->nentries = virtio_rw32(vsc, 0);
2379 		ctrlq->ctrlq_mac_tbl_mc->nentries = virtio_rw32(vsc, 0);
2380 		r = vioif_set_rx_filter(sc);
2381 		if (r != 0) {
2382 			log(LOG_DEBUG, "%s: couldn't clear RX filter\n",
2383 			    ifp->if_xname);
2384 			/* what to do on failure? */
2385 		}
2386 
2387 		ifp->if_flags |= IFF_ALLMULTI;
2388 	}
2389 
2390 set_ifflags:
2391 	r = vioif_ifflags(sc);
2392 
2393 	return r;
2394 }
2395 
2396 static bool
2397 vioif_is_link_up(struct vioif_softc *sc)
2398 {
2399 	struct virtio_softc *vsc = sc->sc_virtio;
2400 	uint16_t status;
2401 
2402 	if (virtio_features(vsc) & VIRTIO_NET_F_STATUS)
2403 		status = virtio_read_device_config_2(vsc,
2404 		    VIRTIO_NET_CONFIG_STATUS);
2405 	else
2406 		status = VIRTIO_NET_S_LINK_UP;
2407 
2408 	return ((status & VIRTIO_NET_S_LINK_UP) != 0);
2409 }
2410 
2411 /* change link status */
2412 static void
2413 vioif_update_link_status(struct vioif_softc *sc)
2414 {
2415 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
2416 	struct vioif_txqueue *txq;
2417 	bool active, changed;
2418 	int link, i;
2419 
2420 	mutex_enter(&sc->sc_lock);
2421 
2422 	active = vioif_is_link_up(sc);
2423 	changed = false;
2424 
2425 	if (active) {
2426 		if (!sc->sc_link_active)
2427 			changed = true;
2428 
2429 		link = LINK_STATE_UP;
2430 		sc->sc_link_active = true;
2431 	} else {
2432 		if (sc->sc_link_active)
2433 			changed = true;
2434 
2435 		link = LINK_STATE_DOWN;
2436 		sc->sc_link_active = false;
2437 	}
2438 
2439 	if (changed) {
2440 		for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
2441 			txq = &sc->sc_txq[i];
2442 
2443 			mutex_enter(txq->txq_lock);
2444 			txq->txq_link_active = sc->sc_link_active;
2445 			mutex_exit(txq->txq_lock);
2446 		}
2447 
2448 		if_link_state_change(ifp, link);
2449 	}
2450 
2451 	mutex_exit(&sc->sc_lock);
2452 }
2453 
2454 static int
2455 vioif_config_change(struct virtio_softc *vsc)
2456 {
2457 	struct vioif_softc *sc = device_private(virtio_child(vsc));
2458 
2459 	softint_schedule(sc->sc_ctl_softint);
2460 	return 0;
2461 }
2462 
2463 static void
2464 vioif_ctl_softint(void *arg)
2465 {
2466 	struct vioif_softc *sc = arg;
2467 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
2468 
2469 	vioif_update_link_status(sc);
2470 	vioif_start(ifp);
2471 }
2472 
2473 static struct workqueue *
2474 vioif_workq_create(const char *name, pri_t prio, int ipl, int flags)
2475 {
2476 	struct workqueue *wq;
2477 	int error;
2478 
2479 	error = workqueue_create(&wq, name, vioif_workq_work, NULL,
2480 	    prio, ipl, flags);
2481 
2482 	if (error)
2483 		return NULL;
2484 
2485 	return wq;
2486 }
2487 
2488 static void
2489 vioif_workq_destroy(struct workqueue *wq)
2490 {
2491 
2492 	workqueue_destroy(wq);
2493 }
2494 
2495 static void
2496 vioif_workq_work(struct work *wk, void *context)
2497 {
2498 	struct vioif_work *work;
2499 
2500 	work = container_of(wk, struct vioif_work, cookie);
2501 
2502 	atomic_store_relaxed(&work->added, 0);
2503 	work->func(work->arg);
2504 }
2505 
2506 static void
2507 vioif_work_set(struct vioif_work *work, void (*func)(void *), void *arg)
2508 {
2509 
2510 	memset(work, 0, sizeof(*work));
2511 	work->func = func;
2512 	work->arg = arg;
2513 }
2514 
2515 static void
2516 vioif_work_add(struct workqueue *wq, struct vioif_work *work)
2517 {
2518 
2519 	if (atomic_load_relaxed(&work->added) != 0)
2520 		return;
2521 
2522 	atomic_store_relaxed(&work->added, 1);
2523 	kpreempt_disable();
2524 	workqueue_enqueue(wq, &work->cookie, NULL);
2525 	kpreempt_enable();
2526 }
2527 
2528 static void
2529 vioif_work_wait(struct workqueue *wq, struct vioif_work *work)
2530 {
2531 
2532 	workqueue_wait(wq, &work->cookie);
2533 }
2534 
2535 static int
2536 vioif_setup_sysctl(struct vioif_softc *sc)
2537 {
2538 	const char *devname;
2539 	struct sysctllog **log;
2540 	const struct sysctlnode *rnode, *rxnode, *txnode;
2541 	int error;
2542 
2543 	log = &sc->sc_sysctllog;
2544 	devname = device_xname(sc->sc_dev);
2545 
2546 	error = sysctl_createv(log, 0, NULL, &rnode,
2547 	    0, CTLTYPE_NODE, devname,
2548 	    SYSCTL_DESCR("virtio-net information and settings"),
2549 	    NULL, 0, NULL, 0, CTL_HW, CTL_CREATE, CTL_EOL);
2550 	if (error)
2551 		goto out;
2552 
2553 	error = sysctl_createv(log, 0, &rnode, NULL,
2554 	    CTLFLAG_READWRITE, CTLTYPE_BOOL, "txrx_workqueue",
2555 	    SYSCTL_DESCR("Use workqueue for packet processing"),
2556 	    NULL, 0, &sc->sc_txrx_workqueue_sysctl, 0, CTL_CREATE, CTL_EOL);
2557 	if (error)
2558 		goto out;
2559 
2560 	error = sysctl_createv(log, 0, &rnode, &rxnode,
2561 	    0, CTLTYPE_NODE, "rx",
2562 	    SYSCTL_DESCR("virtio-net information and settings for Rx"),
2563 	    NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL);
2564 	if (error)
2565 		goto out;
2566 
2567 	error = sysctl_createv(log, 0, &rxnode, NULL,
2568 	    CTLFLAG_READWRITE, CTLTYPE_INT, "intr_process_limit",
2569 	    SYSCTL_DESCR("max number of Rx packets to process for interrupt processing"),
2570 	    NULL, 0, &sc->sc_rx_intr_process_limit, 0, CTL_CREATE, CTL_EOL);
2571 	if (error)
2572 		goto out;
2573 
2574 	error = sysctl_createv(log, 0, &rxnode, NULL,
2575 	    CTLFLAG_READWRITE, CTLTYPE_INT, "process_limit",
2576 	    SYSCTL_DESCR("max number of Rx packets to process for deferred processing"),
2577 	    NULL, 0, &sc->sc_rx_process_limit, 0, CTL_CREATE, CTL_EOL);
2578 	if (error)
2579 		goto out;
2580 
2581 	error = sysctl_createv(log, 0, &rnode, &txnode,
2582 	    0, CTLTYPE_NODE, "tx",
2583 	    SYSCTL_DESCR("virtio-net information and settings for Tx"),
2584 	    NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL);
2585 	if (error)
2586 		goto out;
2587 
2588 	error = sysctl_createv(log, 0, &txnode, NULL,
2589 	    CTLFLAG_READWRITE, CTLTYPE_INT, "intr_process_limit",
2590 	    SYSCTL_DESCR("max number of Tx packets to process for interrupt processing"),
2591 	    NULL, 0, &sc->sc_tx_intr_process_limit, 0, CTL_CREATE, CTL_EOL);
2592 	if (error)
2593 		goto out;
2594 
2595 	error = sysctl_createv(log, 0, &txnode, NULL,
2596 	    CTLFLAG_READWRITE, CTLTYPE_INT, "process_limit",
2597 	    SYSCTL_DESCR("max number of Tx packets to process for deferred processing"),
2598 	    NULL, 0, &sc->sc_tx_process_limit, 0, CTL_CREATE, CTL_EOL);
2599 
2600 out:
2601 	if (error)
2602 		sysctl_teardown(log);
2603 
2604 	return error;
2605 }
2606 
2607 static void
2608 vioif_setup_stats(struct vioif_softc *sc)
2609 {
2610 	struct vioif_rxqueue *rxq;
2611 	struct vioif_txqueue *txq;
2612 	int i;
2613 
2614 	for (i = 0; i < sc->sc_max_nvq_pairs; i++) {
2615 		rxq = &sc->sc_rxq[i];
2616 		txq = &sc->sc_txq[i];
2617 
2618 		snprintf(txq->txq_evgroup, sizeof(txq->txq_evgroup), "%s-TX%d",
2619 		    device_xname(sc->sc_dev), i);
2620 		evcnt_attach_dynamic(&txq->txq_defrag_failed, EVCNT_TYPE_MISC,
2621 		    NULL, txq->txq_evgroup, "tx m_defrag() failed");
2622 		evcnt_attach_dynamic(&txq->txq_mbuf_load_failed, EVCNT_TYPE_MISC,
2623 		    NULL, txq->txq_evgroup, "tx dmamap load failed");
2624 		evcnt_attach_dynamic(&txq->txq_enqueue_reserve_failed, EVCNT_TYPE_MISC,
2625 		    NULL, txq->txq_evgroup, "virtio_enqueue_reserve failed");
2626 
2627 		snprintf(rxq->rxq_evgroup, sizeof(rxq->rxq_evgroup), "%s-RX%d",
2628 		    device_xname(sc->sc_dev), i);
2629 		evcnt_attach_dynamic(&rxq->rxq_mbuf_add_failed, EVCNT_TYPE_MISC,
2630 		    NULL, rxq->rxq_evgroup, "rx mbuf allocation failed");
2631 	}
2632 
2633 	evcnt_attach_dynamic(&sc->sc_ctrlq.ctrlq_cmd_load_failed, EVCNT_TYPE_MISC,
2634 	    NULL, device_xname(sc->sc_dev), "control command dmamap load failed");
2635 	evcnt_attach_dynamic(&sc->sc_ctrlq.ctrlq_cmd_failed, EVCNT_TYPE_MISC,
2636 	    NULL, device_xname(sc->sc_dev), "control command failed");
2637 }
2638 
2639 MODULE(MODULE_CLASS_DRIVER, if_vioif, "virtio");
2640 
2641 #ifdef _MODULE
2642 #include "ioconf.c"
2643 #endif
2644 
2645 static int
2646 if_vioif_modcmd(modcmd_t cmd, void *opaque)
2647 {
2648 	int error = 0;
2649 
2650 #ifdef _MODULE
2651 	switch (cmd) {
2652 	case MODULE_CMD_INIT:
2653 		error = config_init_component(cfdriver_ioconf_if_vioif,
2654 		    cfattach_ioconf_if_vioif, cfdata_ioconf_if_vioif);
2655 		break;
2656 	case MODULE_CMD_FINI:
2657 		error = config_fini_component(cfdriver_ioconf_if_vioif,
2658 		    cfattach_ioconf_if_vioif, cfdata_ioconf_if_vioif);
2659 		break;
2660 	default:
2661 		error = ENOTTY;
2662 		break;
2663 	}
2664 #endif
2665 
2666 	return error;
2667 }
2668