xref: /netbsd-src/sys/dev/pci/if_vioif.c (revision 7d62b00eb9ad855ffcd7da46b41e23feb5476fac)
1 /*	$NetBSD: if_vioif.c,v 1.82 2022/09/12 07:26:04 knakahara Exp $	*/
2 
3 /*
4  * Copyright (c) 2020 The NetBSD Foundation, Inc.
5  * Copyright (c) 2010 Minoura Makoto.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __KERNEL_RCSID(0, "$NetBSD: if_vioif.c,v 1.82 2022/09/12 07:26:04 knakahara Exp $");
31 
32 #ifdef _KERNEL_OPT
33 #include "opt_net_mpsafe.h"
34 #endif
35 
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/kernel.h>
39 #include <sys/atomic.h>
40 #include <sys/bus.h>
41 #include <sys/condvar.h>
42 #include <sys/device.h>
43 #include <sys/evcnt.h>
44 #include <sys/intr.h>
45 #include <sys/kmem.h>
46 #include <sys/mbuf.h>
47 #include <sys/mutex.h>
48 #include <sys/sockio.h>
49 #include <sys/syslog.h>
50 #include <sys/cpu.h>
51 #include <sys/module.h>
52 #include <sys/pcq.h>
53 #include <sys/workqueue.h>
54 
55 #include <dev/pci/virtioreg.h>
56 #include <dev/pci/virtiovar.h>
57 
58 #include <net/if.h>
59 #include <net/if_dl.h>
60 #include <net/if_media.h>
61 #include <net/if_ether.h>
62 
63 #include <net/bpf.h>
64 
65 #include "ioconf.h"
66 
67 #ifdef NET_MPSAFE
68 #define VIOIF_MPSAFE	1
69 #define VIOIF_MULTIQ	1
70 #endif
71 
72 /*
73  * if_vioifreg.h:
74  */
75 /* Configuration registers */
76 #define VIRTIO_NET_CONFIG_MAC		 0 /* 8bit x 6byte */
77 #define VIRTIO_NET_CONFIG_STATUS	 6 /* 16bit */
78 #define VIRTIO_NET_CONFIG_MAX_VQ_PAIRS	 8 /* 16bit */
79 #define VIRTIO_NET_CONFIG_MTU		10 /* 16bit */
80 
81 /* Feature bits */
82 #define VIRTIO_NET_F_CSUM		__BIT(0)
83 #define VIRTIO_NET_F_GUEST_CSUM		__BIT(1)
84 #define VIRTIO_NET_F_MAC		__BIT(5)
85 #define VIRTIO_NET_F_GSO		__BIT(6)
86 #define VIRTIO_NET_F_GUEST_TSO4		__BIT(7)
87 #define VIRTIO_NET_F_GUEST_TSO6		__BIT(8)
88 #define VIRTIO_NET_F_GUEST_ECN		__BIT(9)
89 #define VIRTIO_NET_F_GUEST_UFO		__BIT(10)
90 #define VIRTIO_NET_F_HOST_TSO4		__BIT(11)
91 #define VIRTIO_NET_F_HOST_TSO6		__BIT(12)
92 #define VIRTIO_NET_F_HOST_ECN		__BIT(13)
93 #define VIRTIO_NET_F_HOST_UFO		__BIT(14)
94 #define VIRTIO_NET_F_MRG_RXBUF		__BIT(15)
95 #define VIRTIO_NET_F_STATUS		__BIT(16)
96 #define VIRTIO_NET_F_CTRL_VQ		__BIT(17)
97 #define VIRTIO_NET_F_CTRL_RX		__BIT(18)
98 #define VIRTIO_NET_F_CTRL_VLAN		__BIT(19)
99 #define VIRTIO_NET_F_CTRL_RX_EXTRA	__BIT(20)
100 #define VIRTIO_NET_F_GUEST_ANNOUNCE	__BIT(21)
101 #define VIRTIO_NET_F_MQ			__BIT(22)
102 #define VIRTIO_NET_F_CTRL_MAC_ADDR 	__BIT(23)
103 
104 #define VIRTIO_NET_FLAG_BITS			\
105 	VIRTIO_COMMON_FLAG_BITS			\
106 	"b\x17" "CTRL_MAC\0"			\
107 	"b\x16" "MQ\0"				\
108 	"b\x15" "GUEST_ANNOUNCE\0"		\
109 	"b\x14" "CTRL_RX_EXTRA\0"		\
110 	"b\x13" "CTRL_VLAN\0"			\
111 	"b\x12" "CTRL_RX\0"			\
112 	"b\x11" "CTRL_VQ\0"			\
113 	"b\x10" "STATUS\0"			\
114 	"b\x0f" "MRG_RXBUF\0"			\
115 	"b\x0e" "HOST_UFO\0"			\
116 	"b\x0d" "HOST_ECN\0"			\
117 	"b\x0c" "HOST_TSO6\0"			\
118 	"b\x0b" "HOST_TSO4\0"			\
119 	"b\x0a" "GUEST_UFO\0"			\
120 	"b\x09" "GUEST_ECN\0"			\
121 	"b\x08" "GUEST_TSO6\0"			\
122 	"b\x07" "GUEST_TSO4\0"			\
123 	"b\x06" "GSO\0"				\
124 	"b\x05" "MAC\0"				\
125 	"b\x01" "GUEST_CSUM\0"			\
126 	"b\x00" "CSUM\0"
127 
128 /* Status */
129 #define VIRTIO_NET_S_LINK_UP	1
130 
131 /* Packet header structure */
132 struct virtio_net_hdr {
133 	uint8_t		flags;
134 	uint8_t		gso_type;
135 	uint16_t	hdr_len;
136 	uint16_t	gso_size;
137 	uint16_t	csum_start;
138 	uint16_t	csum_offset;
139 
140 	uint16_t	num_buffers; /* VIRTIO_NET_F_MRG_RXBUF enabled or v1 */
141 } __packed;
142 
143 #define VIRTIO_NET_HDR_F_NEEDS_CSUM	1 /* flags */
144 #define VIRTIO_NET_HDR_GSO_NONE		0 /* gso_type */
145 #define VIRTIO_NET_HDR_GSO_TCPV4	1 /* gso_type */
146 #define VIRTIO_NET_HDR_GSO_UDP		3 /* gso_type */
147 #define VIRTIO_NET_HDR_GSO_TCPV6	4 /* gso_type */
148 #define VIRTIO_NET_HDR_GSO_ECN		0x80 /* gso_type, |'ed */
149 
150 #define VIRTIO_NET_MAX_GSO_LEN		(65536+ETHER_HDR_LEN)
151 
152 /* Control virtqueue */
153 struct virtio_net_ctrl_cmd {
154 	uint8_t	class;
155 	uint8_t	command;
156 } __packed;
157 #define VIRTIO_NET_CTRL_RX		0
158 # define VIRTIO_NET_CTRL_RX_PROMISC	0
159 # define VIRTIO_NET_CTRL_RX_ALLMULTI	1
160 
161 #define VIRTIO_NET_CTRL_MAC		1
162 # define VIRTIO_NET_CTRL_MAC_TABLE_SET	0
163 # define  VIRTIO_NET_CTRL_MAC_ADDR_SET	1
164 
165 #define VIRTIO_NET_CTRL_VLAN		2
166 # define VIRTIO_NET_CTRL_VLAN_ADD	0
167 # define VIRTIO_NET_CTRL_VLAN_DEL	1
168 
169 #define VIRTIO_NET_CTRL_MQ			4
170 # define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET	0
171 # define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN	1
172 # define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX	0x8000
173 
174 struct virtio_net_ctrl_status {
175 	uint8_t	ack;
176 } __packed;
177 #define VIRTIO_NET_OK			0
178 #define VIRTIO_NET_ERR			1
179 
180 struct virtio_net_ctrl_rx {
181 	uint8_t	onoff;
182 } __packed;
183 
184 struct virtio_net_ctrl_mac_tbl {
185 	uint32_t nentries;
186 	uint8_t macs[][ETHER_ADDR_LEN];
187 } __packed;
188 
189 struct virtio_net_ctrl_mac_addr {
190 	uint8_t mac[ETHER_ADDR_LEN];
191 } __packed;
192 
193 struct virtio_net_ctrl_vlan {
194 	uint16_t id;
195 } __packed;
196 
197 struct virtio_net_ctrl_mq {
198 	uint16_t virtqueue_pairs;
199 } __packed;
200 
201 /*
202  * if_vioifvar.h:
203  */
204 
205 /*
206  * Locking notes:
207  * + a field in vioif_txqueue is protected by txq_lock (a spin mutex), and
208  *   a field in vioif_rxqueue is protected by rxq_lock (a spin mutex).
209  *      - more than one lock cannot be held at onece
210  * + ctrlq_inuse is protected by ctrlq_wait_lock.
211  *      - other fields in vioif_ctrlqueue are protected by ctrlq_inuse
212  *      - txq_lock or rxq_lock cannot be held along with ctrlq_wait_lock
213  * + fields in vioif_softc except queues are protected by
214  *   sc->sc_lock(an adaptive mutex)
215  *      - the lock is held before acquisition of other locks
216  */
217 
218 struct vioif_ctrl_cmdspec {
219 	bus_dmamap_t	dmamap;
220 	void		*buf;
221 	bus_size_t	bufsize;
222 };
223 
224 struct vioif_work {
225 	struct work	 cookie;
226 	void		(*func)(void *);
227 	void		*arg;
228 	unsigned int	 added;
229 };
230 
231 struct vioif_txqueue {
232 	kmutex_t		*txq_lock;	/* lock for tx operations */
233 
234 	struct virtqueue	*txq_vq;
235 	bool			txq_stopping;
236 	bool			txq_link_active;
237 	pcq_t			*txq_intrq;
238 
239 	struct virtio_net_hdr	*txq_hdrs;
240 	bus_dmamap_t		*txq_hdr_dmamaps;
241 
242 	struct mbuf		**txq_mbufs;
243 	bus_dmamap_t		*txq_dmamaps;
244 
245 	void			*txq_deferred_transmit;
246 	void			*txq_handle_si;
247 	struct vioif_work	 txq_work;
248 	bool			 txq_workqueue;
249 	bool			 txq_active;
250 
251 	char			 txq_evgroup[16];
252 	struct evcnt		 txq_defrag_failed;
253 	struct evcnt		 txq_mbuf_load_failed;
254 	struct evcnt		 txq_enqueue_reserve_failed;
255 };
256 
257 struct vioif_rxqueue {
258 	kmutex_t		*rxq_lock;	/* lock for rx operations */
259 
260 	struct virtqueue	*rxq_vq;
261 	bool			rxq_stopping;
262 
263 	struct virtio_net_hdr	*rxq_hdrs;
264 	bus_dmamap_t		*rxq_hdr_dmamaps;
265 
266 	struct mbuf		**rxq_mbufs;
267 	bus_dmamap_t		*rxq_dmamaps;
268 
269 	void			*rxq_handle_si;
270 	struct vioif_work	 rxq_work;
271 	bool			 rxq_workqueue;
272 	bool			 rxq_active;
273 
274 	char			 rxq_evgroup[16];
275 	struct evcnt		 rxq_mbuf_add_failed;
276 };
277 
278 struct vioif_ctrlqueue {
279 	struct virtqueue		*ctrlq_vq;
280 	enum {
281 		FREE, INUSE, DONE
282 	}				ctrlq_inuse;
283 	kcondvar_t			ctrlq_wait;
284 	kmutex_t			ctrlq_wait_lock;
285 	struct lwp			*ctrlq_owner;
286 
287 	struct virtio_net_ctrl_cmd	*ctrlq_cmd;
288 	struct virtio_net_ctrl_status	*ctrlq_status;
289 	struct virtio_net_ctrl_rx	*ctrlq_rx;
290 	struct virtio_net_ctrl_mac_tbl	*ctrlq_mac_tbl_uc;
291 	struct virtio_net_ctrl_mac_tbl	*ctrlq_mac_tbl_mc;
292 	struct virtio_net_ctrl_mac_addr	*ctrlq_mac_addr;
293 	struct virtio_net_ctrl_mq	*ctrlq_mq;
294 
295 	bus_dmamap_t			ctrlq_cmd_dmamap;
296 	bus_dmamap_t			ctrlq_status_dmamap;
297 	bus_dmamap_t			ctrlq_rx_dmamap;
298 	bus_dmamap_t			ctrlq_tbl_uc_dmamap;
299 	bus_dmamap_t			ctrlq_tbl_mc_dmamap;
300 	bus_dmamap_t			ctrlq_mac_addr_dmamap;
301 	bus_dmamap_t			ctrlq_mq_dmamap;
302 
303 	struct evcnt			ctrlq_cmd_load_failed;
304 	struct evcnt			ctrlq_cmd_failed;
305 };
306 
307 struct vioif_softc {
308 	device_t		sc_dev;
309 	kmutex_t		sc_lock;
310 	struct sysctllog	*sc_sysctllog;
311 
312 	struct virtio_softc	*sc_virtio;
313 	struct virtqueue	*sc_vqs;
314 	u_int			 sc_hdr_size;
315 
316 	int			sc_max_nvq_pairs;
317 	int			sc_req_nvq_pairs;
318 	int			sc_act_nvq_pairs;
319 
320 	uint8_t			sc_mac[ETHER_ADDR_LEN];
321 	struct ethercom		sc_ethercom;
322 	int			sc_link_state;
323 
324 	struct vioif_txqueue	*sc_txq;
325 	struct vioif_rxqueue	*sc_rxq;
326 
327 	bool			sc_has_ctrl;
328 	struct vioif_ctrlqueue	sc_ctrlq;
329 
330 	bus_dma_segment_t	sc_hdr_segs[1];
331 	void			*sc_dmamem;
332 	void			*sc_kmem;
333 
334 	void			*sc_ctl_softint;
335 
336 	struct workqueue	*sc_txrx_workqueue;
337 	bool			 sc_txrx_workqueue_sysctl;
338 	u_int			 sc_tx_intr_process_limit;
339 	u_int			 sc_tx_process_limit;
340 	u_int			 sc_rx_intr_process_limit;
341 	u_int			 sc_rx_process_limit;
342 };
343 #define VIRTIO_NET_TX_MAXNSEGS		(16) /* XXX */
344 #define VIRTIO_NET_CTRL_MAC_MAXENTRIES	(64) /* XXX */
345 
346 #define VIOIF_TX_INTR_PROCESS_LIMIT	256
347 #define VIOIF_TX_PROCESS_LIMIT		256
348 #define VIOIF_RX_INTR_PROCESS_LIMIT	0U
349 #define VIOIF_RX_PROCESS_LIMIT		256
350 
351 #define VIOIF_WORKQUEUE_PRI		PRI_SOFTNET
352 #define VIOIF_IS_LINK_ACTIVE(_sc)	((_sc)->sc_link_state == LINK_STATE_UP ? \
353 					    true : false)
354 
355 /* cfattach interface functions */
356 static int	vioif_match(device_t, cfdata_t, void *);
357 static void	vioif_attach(device_t, device_t, void *);
358 static int	vioif_finalize_teardown(device_t);
359 
360 /* ifnet interface functions */
361 static int	vioif_init(struct ifnet *);
362 static void	vioif_stop(struct ifnet *, int);
363 static void	vioif_start(struct ifnet *);
364 static void	vioif_start_locked(struct ifnet *, struct vioif_txqueue *);
365 static int	vioif_transmit(struct ifnet *, struct mbuf *);
366 static void	vioif_transmit_locked(struct ifnet *, struct vioif_txqueue *);
367 static int	vioif_ioctl(struct ifnet *, u_long, void *);
368 static void	vioif_watchdog(struct ifnet *);
369 static int	vioif_ifflags_cb(struct ethercom *);
370 
371 /* rx */
372 static int	vioif_add_rx_mbuf(struct vioif_rxqueue *, int);
373 static void	vioif_free_rx_mbuf(struct vioif_rxqueue *, int);
374 static void	vioif_populate_rx_mbufs_locked(struct vioif_softc *,
375 		    struct vioif_rxqueue *);
376 static void	vioif_rx_queue_clear(struct vioif_rxqueue *);
377 static bool	vioif_rx_deq_locked(struct vioif_softc *, struct virtio_softc *,
378 		    struct vioif_rxqueue *, u_int);
379 static int	vioif_rx_intr(void *);
380 static void	vioif_rx_handle(void *);
381 static void	vioif_rx_sched_handle(struct vioif_softc *,
382 		    struct vioif_rxqueue *);
383 static void	vioif_rx_drain(struct vioif_rxqueue *);
384 
385 /* tx */
386 static int	vioif_tx_intr(void *);
387 static void	vioif_tx_handle(void *);
388 static void	vioif_tx_sched_handle(struct vioif_softc *,
389 		    struct vioif_txqueue *);
390 static void	vioif_tx_queue_clear(struct vioif_txqueue *);
391 static bool	vioif_tx_deq_locked(struct vioif_softc *, struct virtio_softc *,
392 		    struct vioif_txqueue *, u_int);
393 static void	vioif_tx_drain(struct vioif_txqueue *);
394 static void	vioif_deferred_transmit(void *);
395 
396 /* workqueue */
397 static struct workqueue*
398 		vioif_workq_create(const char *, pri_t, int, int);
399 static void	vioif_workq_destroy(struct workqueue *);
400 static void	vioif_workq_work(struct work *, void *);
401 static void	vioif_work_set(struct vioif_work *, void(*)(void *), void *);
402 static void	vioif_work_add(struct workqueue *, struct vioif_work *);
403 static void	vioif_work_wait(struct workqueue *, struct vioif_work *);
404 
405 /* other control */
406 static int	vioif_get_link_status(struct vioif_softc *);
407 static void	vioif_update_link_status(struct vioif_softc *);
408 static int	vioif_ctrl_rx(struct vioif_softc *, int, bool);
409 static int	vioif_set_promisc(struct vioif_softc *, bool);
410 static int	vioif_set_allmulti(struct vioif_softc *, bool);
411 static int	vioif_set_rx_filter(struct vioif_softc *);
412 static int	vioif_rx_filter(struct vioif_softc *);
413 static int	vioif_set_mac_addr(struct vioif_softc *);
414 static int	vioif_ctrl_intr(void *);
415 static int	vioif_config_change(struct virtio_softc *);
416 static void	vioif_ctl_softint(void *);
417 static int	vioif_ctrl_mq_vq_pairs_set(struct vioif_softc *, int);
418 static void	vioif_enable_interrupt_vqpairs(struct vioif_softc *);
419 static void	vioif_disable_interrupt_vqpairs(struct vioif_softc *);
420 static int	vioif_setup_sysctl(struct vioif_softc *);
421 static void	vioif_setup_stats(struct vioif_softc *);
422 static int	vioif_ifflags(struct vioif_softc *);
423 
424 CFATTACH_DECL_NEW(vioif, sizeof(struct vioif_softc),
425 		  vioif_match, vioif_attach, NULL, NULL);
426 
427 static int
428 vioif_match(device_t parent, cfdata_t match, void *aux)
429 {
430 	struct virtio_attach_args *va = aux;
431 
432 	if (va->sc_childdevid == VIRTIO_DEVICE_ID_NETWORK)
433 		return 1;
434 
435 	return 0;
436 }
437 
438 static int
439 vioif_dmamap_create(struct vioif_softc *sc, bus_dmamap_t *map,
440     bus_size_t size, int nsegs, const char *usage)
441 {
442 	int r;
443 
444 	r = bus_dmamap_create(virtio_dmat(sc->sc_virtio), size,
445 	    nsegs, size, 0, BUS_DMA_NOWAIT | BUS_DMA_ALLOCNOW, map);
446 
447 	if (r != 0) {
448 		aprint_error_dev(sc->sc_dev, "%s dmamap creation failed, "
449 		    "error code %d\n", usage, r);
450 	}
451 
452 	return r;
453 }
454 
455 static void
456 vioif_dmamap_destroy(struct vioif_softc *sc, bus_dmamap_t *map)
457 {
458 
459 	if (*map) {
460 		bus_dmamap_destroy(virtio_dmat(sc->sc_virtio), *map);
461 		*map = NULL;
462 	}
463 }
464 
465 static int
466 vioif_dmamap_create_load(struct vioif_softc *sc, bus_dmamap_t *map,
467     void *buf, bus_size_t size, int nsegs, int rw, const char *usage)
468 {
469 	int r;
470 
471 	r = vioif_dmamap_create(sc, map, size, nsegs, usage);
472 	if (r != 0)
473 		return 1;
474 
475 	r = bus_dmamap_load(virtio_dmat(sc->sc_virtio), *map, buf,
476 	    size, NULL, rw | BUS_DMA_NOWAIT);
477 	if (r != 0) {
478 		vioif_dmamap_destroy(sc, map);
479 		aprint_error_dev(sc->sc_dev, "%s dmamap load failed. "
480 		    "error code %d\n", usage, r);
481 	}
482 
483 	return r;
484 }
485 
486 static void *
487 vioif_assign_mem(intptr_t *p, size_t size)
488 {
489 	intptr_t rv;
490 
491 	rv = *p;
492 	*p += size;
493 
494 	return (void *)rv;
495 }
496 
497 static void
498 vioif_alloc_queues(struct vioif_softc *sc)
499 {
500 	int nvq_pairs = sc->sc_max_nvq_pairs;
501 	int nvqs = nvq_pairs * 2;
502 	int i;
503 
504 	KASSERT(nvq_pairs <= VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX);
505 
506 	sc->sc_rxq = kmem_zalloc(sizeof(sc->sc_rxq[0]) * nvq_pairs,
507 	    KM_SLEEP);
508 	sc->sc_txq = kmem_zalloc(sizeof(sc->sc_txq[0]) * nvq_pairs,
509 	    KM_SLEEP);
510 
511 	if (sc->sc_has_ctrl)
512 		nvqs++;
513 
514 	sc->sc_vqs = kmem_zalloc(sizeof(sc->sc_vqs[0]) * nvqs, KM_SLEEP);
515 	nvqs = 0;
516 	for (i = 0; i < nvq_pairs; i++) {
517 		sc->sc_rxq[i].rxq_vq = &sc->sc_vqs[nvqs++];
518 		sc->sc_txq[i].txq_vq = &sc->sc_vqs[nvqs++];
519 	}
520 
521 	if (sc->sc_has_ctrl)
522 		sc->sc_ctrlq.ctrlq_vq = &sc->sc_vqs[nvqs++];
523 }
524 
525 static void
526 vioif_free_queues(struct vioif_softc *sc)
527 {
528 	int nvq_pairs = sc->sc_max_nvq_pairs;
529 	int nvqs = nvq_pairs * 2;
530 
531 	if (sc->sc_ctrlq.ctrlq_vq)
532 		nvqs++;
533 
534 	if (sc->sc_txq) {
535 		kmem_free(sc->sc_txq, sizeof(sc->sc_txq[0]) * nvq_pairs);
536 		sc->sc_txq = NULL;
537 	}
538 
539 	if (sc->sc_rxq) {
540 		kmem_free(sc->sc_rxq, sizeof(sc->sc_rxq[0]) * nvq_pairs);
541 		sc->sc_rxq = NULL;
542 	}
543 
544 	if (sc->sc_vqs) {
545 		kmem_free(sc->sc_vqs, sizeof(sc->sc_vqs[0]) * nvqs);
546 		sc->sc_vqs = NULL;
547 	}
548 }
549 
550 /* allocate memory */
551 /*
552  * dma memory is used for:
553  *   rxq_hdrs[slot]:	 metadata array for received frames (READ)
554  *   txq_hdrs[slot]:	 metadata array for frames to be sent (WRITE)
555  *   ctrlq_cmd:		 command to be sent via ctrl vq (WRITE)
556  *   ctrlq_status:	 return value for a command via ctrl vq (READ)
557  *   ctrlq_rx:		 parameter for a VIRTIO_NET_CTRL_RX class command
558  *			 (WRITE)
559  *   ctrlq_mac_tbl_uc:	 unicast MAC address filter for a VIRTIO_NET_CTRL_MAC
560  *			 class command (WRITE)
561  *   ctrlq_mac_tbl_mc:	 multicast MAC address filter for a VIRTIO_NET_CTRL_MAC
562  *			 class command (WRITE)
563  * ctrlq_* structures are allocated only one each; they are protected by
564  * ctrlq_inuse variable and ctrlq_wait condvar.
565  */
566 /*
567  * dynamically allocated memory is used for:
568  *   rxq_hdr_dmamaps[slot]:	bus_dmamap_t array for sc_rx_hdrs[slot]
569  *   txq_hdr_dmamaps[slot]:	bus_dmamap_t array for sc_tx_hdrs[slot]
570  *   rxq_dmamaps[slot]:		bus_dmamap_t array for received payload
571  *   txq_dmamaps[slot]:		bus_dmamap_t array for sent payload
572  *   rxq_mbufs[slot]:		mbuf pointer array for received frames
573  *   txq_mbufs[slot]:		mbuf pointer array for sent frames
574  */
575 static int
576 vioif_alloc_mems(struct vioif_softc *sc)
577 {
578 	struct virtio_softc *vsc = sc->sc_virtio;
579 	struct vioif_txqueue *txq;
580 	struct vioif_rxqueue *rxq;
581 	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
582 	int allocsize, allocsize2, r, rsegs, i, qid;
583 	void *vaddr;
584 	intptr_t p;
585 
586 	allocsize = 0;
587 	for (qid = 0; qid < sc->sc_max_nvq_pairs; qid++) {
588 		rxq = &sc->sc_rxq[qid];
589 		txq = &sc->sc_txq[qid];
590 
591 		allocsize += sizeof(struct virtio_net_hdr) *
592 			(rxq->rxq_vq->vq_num + txq->txq_vq->vq_num);
593 	}
594 	if (sc->sc_has_ctrl) {
595 		allocsize += sizeof(struct virtio_net_ctrl_cmd);
596 		allocsize += sizeof(struct virtio_net_ctrl_status);
597 		allocsize += sizeof(struct virtio_net_ctrl_rx);
598 		allocsize += sizeof(struct virtio_net_ctrl_mac_tbl)
599 		    + ETHER_ADDR_LEN;
600 		allocsize += sizeof(struct virtio_net_ctrl_mac_tbl)
601 		    + ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_MAXENTRIES;
602 		allocsize += sizeof(struct virtio_net_ctrl_mac_addr);
603 		allocsize += sizeof(struct virtio_net_ctrl_mq);
604 	}
605 	r = bus_dmamem_alloc(virtio_dmat(vsc), allocsize, 0, 0,
606 	    &sc->sc_hdr_segs[0], 1, &rsegs, BUS_DMA_NOWAIT);
607 	if (r != 0) {
608 		aprint_error_dev(sc->sc_dev,
609 		    "DMA memory allocation failed, size %d, "
610 		    "error code %d\n", allocsize, r);
611 		goto err_none;
612 	}
613 	r = bus_dmamem_map(virtio_dmat(vsc),
614 	    &sc->sc_hdr_segs[0], 1, allocsize, &vaddr, BUS_DMA_NOWAIT);
615 	if (r != 0) {
616 		aprint_error_dev(sc->sc_dev,
617 		    "DMA memory map failed, error code %d\n", r);
618 		goto err_dmamem_alloc;
619 	}
620 
621 	memset(vaddr, 0, allocsize);
622 	sc->sc_dmamem = vaddr;
623 	p = (intptr_t) vaddr;
624 
625 	for (qid = 0; qid < sc->sc_max_nvq_pairs; qid++) {
626 		rxq = &sc->sc_rxq[qid];
627 		txq = &sc->sc_txq[qid];
628 
629 		rxq->rxq_hdrs = vioif_assign_mem(&p,
630 		    sizeof(struct virtio_net_hdr) * rxq->rxq_vq->vq_num);
631 		txq->txq_hdrs = vioif_assign_mem(&p,
632 		    sizeof(struct virtio_net_hdr) * txq->txq_vq->vq_num);
633 	}
634 	if (sc->sc_has_ctrl) {
635 		ctrlq->ctrlq_cmd = vioif_assign_mem(&p,
636 		    sizeof(*ctrlq->ctrlq_cmd));
637 		ctrlq->ctrlq_status = vioif_assign_mem(&p,
638 		    sizeof(*ctrlq->ctrlq_status));
639 		ctrlq->ctrlq_rx = vioif_assign_mem(&p,
640 		    sizeof(*ctrlq->ctrlq_rx));
641 		ctrlq->ctrlq_mac_tbl_uc = vioif_assign_mem(&p,
642 		    sizeof(*ctrlq->ctrlq_mac_tbl_uc)
643 		    + ETHER_ADDR_LEN);
644 		ctrlq->ctrlq_mac_tbl_mc = vioif_assign_mem(&p,
645 		    sizeof(*ctrlq->ctrlq_mac_tbl_mc)
646 		    + ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_MAXENTRIES);
647 		ctrlq->ctrlq_mac_addr = vioif_assign_mem(&p,
648 		    sizeof(*ctrlq->ctrlq_mac_addr));
649 		ctrlq->ctrlq_mq = vioif_assign_mem(&p, sizeof(*ctrlq->ctrlq_mq));
650 	}
651 
652 	allocsize2 = 0;
653 	for (qid = 0; qid < sc->sc_max_nvq_pairs; qid++) {
654 		int rxqsize, txqsize;
655 
656 		rxq = &sc->sc_rxq[qid];
657 		txq = &sc->sc_txq[qid];
658 		rxqsize = rxq->rxq_vq->vq_num;
659 		txqsize = txq->txq_vq->vq_num;
660 
661 		allocsize2 += sizeof(rxq->rxq_dmamaps[0]) * rxqsize;
662 		allocsize2 += sizeof(rxq->rxq_hdr_dmamaps[0]) * rxqsize;
663 		allocsize2 += sizeof(rxq->rxq_mbufs[0]) * rxqsize;
664 
665 		allocsize2 += sizeof(txq->txq_dmamaps[0]) * txqsize;
666 		allocsize2 += sizeof(txq->txq_hdr_dmamaps[0]) * txqsize;
667 		allocsize2 += sizeof(txq->txq_mbufs[0]) * txqsize;
668 	}
669 	vaddr = kmem_zalloc(allocsize2, KM_SLEEP);
670 	sc->sc_kmem = vaddr;
671 	p = (intptr_t) vaddr;
672 
673 	for (qid = 0; qid < sc->sc_max_nvq_pairs; qid++) {
674 		int rxqsize, txqsize;
675 		rxq = &sc->sc_rxq[qid];
676 		txq = &sc->sc_txq[qid];
677 		rxqsize = rxq->rxq_vq->vq_num;
678 		txqsize = txq->txq_vq->vq_num;
679 
680 		rxq->rxq_hdr_dmamaps = vioif_assign_mem(&p,
681 		    sizeof(rxq->rxq_hdr_dmamaps[0]) * rxqsize);
682 		txq->txq_hdr_dmamaps = vioif_assign_mem(&p,
683 		    sizeof(txq->txq_hdr_dmamaps[0]) * txqsize);
684 		rxq->rxq_dmamaps = vioif_assign_mem(&p,
685 		    sizeof(rxq->rxq_dmamaps[0]) * rxqsize);
686 		txq->txq_dmamaps = vioif_assign_mem(&p,
687 		    sizeof(txq->txq_dmamaps[0]) * txqsize);
688 		rxq->rxq_mbufs = vioif_assign_mem(&p,
689 		    sizeof(rxq->rxq_mbufs[0]) * rxqsize);
690 		txq->txq_mbufs = vioif_assign_mem(&p,
691 		    sizeof(txq->txq_mbufs[0]) * txqsize);
692 	}
693 
694 	for (qid = 0; qid < sc->sc_max_nvq_pairs; qid++) {
695 		rxq = &sc->sc_rxq[qid];
696 		txq = &sc->sc_txq[qid];
697 
698 		for (i = 0; i < rxq->rxq_vq->vq_num; i++) {
699 			r = vioif_dmamap_create_load(sc, &rxq->rxq_hdr_dmamaps[i],
700 			    &rxq->rxq_hdrs[i], sc->sc_hdr_size, 1,
701 			    BUS_DMA_READ, "rx header");
702 			if (r != 0)
703 				goto err_reqs;
704 
705 			r = vioif_dmamap_create(sc, &rxq->rxq_dmamaps[i],
706 			    MCLBYTES, 1, "rx payload");
707 			if (r != 0)
708 				goto err_reqs;
709 		}
710 
711 		for (i = 0; i < txq->txq_vq->vq_num; i++) {
712 			r = vioif_dmamap_create_load(sc, &txq->txq_hdr_dmamaps[i],
713 			    &txq->txq_hdrs[i], sc->sc_hdr_size, 1,
714 			    BUS_DMA_READ, "tx header");
715 			if (r != 0)
716 				goto err_reqs;
717 
718 			r = vioif_dmamap_create(sc, &txq->txq_dmamaps[i], ETHER_MAX_LEN,
719 			    VIRTIO_NET_TX_MAXNSEGS, "tx payload");
720 			if (r != 0)
721 				goto err_reqs;
722 		}
723 	}
724 
725 	if (sc->sc_has_ctrl) {
726 		/* control vq class & command */
727 		r = vioif_dmamap_create_load(sc, &ctrlq->ctrlq_cmd_dmamap,
728 		    ctrlq->ctrlq_cmd, sizeof(*ctrlq->ctrlq_cmd), 1,
729 		    BUS_DMA_WRITE, "control command");
730 		if (r != 0)
731 			goto err_reqs;
732 
733 		r = vioif_dmamap_create_load(sc, &ctrlq->ctrlq_status_dmamap,
734 		    ctrlq->ctrlq_status, sizeof(*ctrlq->ctrlq_status), 1,
735 		    BUS_DMA_READ, "control status");
736 		if (r != 0)
737 			goto err_reqs;
738 
739 		/* control vq rx mode command parameter */
740 		r = vioif_dmamap_create_load(sc, &ctrlq->ctrlq_rx_dmamap,
741 		    ctrlq->ctrlq_rx, sizeof(*ctrlq->ctrlq_rx), 1,
742 		    BUS_DMA_WRITE, "rx mode control command");
743 		if (r != 0)
744 			goto err_reqs;
745 
746 		/* multiqueue set command */
747 		r = vioif_dmamap_create_load(sc, &ctrlq->ctrlq_mq_dmamap,
748 		    ctrlq->ctrlq_mq, sizeof(*ctrlq->ctrlq_mq), 1,
749 		    BUS_DMA_WRITE, "multiqueue set command");
750 		if (r != 0)
751 			goto err_reqs;
752 
753 		/* control vq MAC filter table for unicast */
754 		/* do not load now since its length is variable */
755 		r = vioif_dmamap_create(sc, &ctrlq->ctrlq_tbl_uc_dmamap,
756 		    sizeof(*ctrlq->ctrlq_mac_tbl_uc)
757 		    + ETHER_ADDR_LEN, 1,
758 		    "unicast MAC address filter command");
759 		if (r != 0)
760 			goto err_reqs;
761 
762 		/* control vq MAC filter table for multicast */
763 		r = vioif_dmamap_create(sc, &ctrlq->ctrlq_tbl_mc_dmamap,
764 		    sizeof(*ctrlq->ctrlq_mac_tbl_mc)
765 		    + ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_MAXENTRIES, 1,
766 		    "multicast MAC address filter command");
767 		if (r != 0)
768 			goto err_reqs;
769 
770 		/* control vq MAC address set command */
771 		r = vioif_dmamap_create_load(sc,
772 		    &ctrlq->ctrlq_mac_addr_dmamap,
773 		    ctrlq->ctrlq_mac_addr,
774 		    sizeof(*ctrlq->ctrlq_mac_addr), 1,
775 		    BUS_DMA_WRITE, "mac addr set command");
776 		if (r != 0)
777 			goto err_reqs;
778 	}
779 
780 	return 0;
781 
782 err_reqs:
783 	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_tbl_mc_dmamap);
784 	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_tbl_uc_dmamap);
785 	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_rx_dmamap);
786 	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_status_dmamap);
787 	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_cmd_dmamap);
788 	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_mac_addr_dmamap);
789 	for (qid = 0; qid < sc->sc_max_nvq_pairs; qid++) {
790 		rxq = &sc->sc_rxq[qid];
791 		txq = &sc->sc_txq[qid];
792 
793 		for (i = 0; i < txq->txq_vq->vq_num; i++) {
794 			vioif_dmamap_destroy(sc, &txq->txq_dmamaps[i]);
795 			vioif_dmamap_destroy(sc, &txq->txq_hdr_dmamaps[i]);
796 		}
797 		for (i = 0; i < rxq->rxq_vq->vq_num; i++) {
798 			vioif_dmamap_destroy(sc, &rxq->rxq_dmamaps[i]);
799 			vioif_dmamap_destroy(sc, &rxq->rxq_hdr_dmamaps[i]);
800 		}
801 	}
802 	if (sc->sc_kmem) {
803 		kmem_free(sc->sc_kmem, allocsize2);
804 		sc->sc_kmem = NULL;
805 	}
806 	bus_dmamem_unmap(virtio_dmat(vsc), sc->sc_dmamem, allocsize);
807 err_dmamem_alloc:
808 	bus_dmamem_free(virtio_dmat(vsc), &sc->sc_hdr_segs[0], 1);
809 err_none:
810 	return -1;
811 }
812 
813 static void
814 vioif_attach(device_t parent, device_t self, void *aux)
815 {
816 	struct vioif_softc *sc = device_private(self);
817 	struct virtio_softc *vsc = device_private(parent);
818 	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
819 	struct vioif_txqueue *txq;
820 	struct vioif_rxqueue *rxq;
821 	uint64_t features, req_features;
822 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
823 	u_int softint_flags;
824 	int r, i, nvqs = 0, req_flags;
825 	char xnamebuf[MAXCOMLEN];
826 
827 	if (virtio_child(vsc) != NULL) {
828 		aprint_normal(": child already attached for %s; "
829 		    "something wrong...\n", device_xname(parent));
830 		return;
831 	}
832 
833 	sc->sc_dev = self;
834 	sc->sc_virtio = vsc;
835 	sc->sc_link_state = LINK_STATE_UNKNOWN;
836 
837 	sc->sc_max_nvq_pairs = 1;
838 	sc->sc_req_nvq_pairs = 1;
839 	sc->sc_act_nvq_pairs = 1;
840 	sc->sc_txrx_workqueue_sysctl = true;
841 	sc->sc_tx_intr_process_limit = VIOIF_TX_INTR_PROCESS_LIMIT;
842 	sc->sc_tx_process_limit = VIOIF_TX_PROCESS_LIMIT;
843 	sc->sc_rx_intr_process_limit = VIOIF_RX_INTR_PROCESS_LIMIT;
844 	sc->sc_rx_process_limit = VIOIF_RX_PROCESS_LIMIT;
845 
846 	mutex_init(&sc->sc_lock, MUTEX_DEFAULT, IPL_NONE);
847 
848 	snprintf(xnamebuf, sizeof(xnamebuf), "%s_txrx", device_xname(self));
849 	sc->sc_txrx_workqueue = vioif_workq_create(xnamebuf, VIOIF_WORKQUEUE_PRI,
850 	    IPL_NET, WQ_PERCPU | WQ_MPSAFE);
851 	if (sc->sc_txrx_workqueue == NULL)
852 		goto err;
853 
854 	req_flags = 0;
855 
856 #ifdef VIOIF_MPSAFE
857 	req_flags |= VIRTIO_F_INTR_MPSAFE;
858 #endif
859 	req_flags |= VIRTIO_F_INTR_MSIX;
860 
861 	req_features =
862 	    VIRTIO_NET_F_MAC | VIRTIO_NET_F_STATUS | VIRTIO_NET_F_CTRL_VQ |
863 	    VIRTIO_NET_F_CTRL_RX | VIRTIO_F_NOTIFY_ON_EMPTY;
864 	req_features |= VIRTIO_F_RING_EVENT_IDX;
865 	req_features |= VIRTIO_NET_F_CTRL_MAC_ADDR;
866 #ifdef VIOIF_MULTIQ
867 	req_features |= VIRTIO_NET_F_MQ;
868 #endif
869 	virtio_child_attach_start(vsc, self, IPL_NET, NULL,
870 	    vioif_config_change, virtio_vq_intrhand, req_flags,
871 	    req_features, VIRTIO_NET_FLAG_BITS);
872 
873 	features = virtio_features(vsc);
874 	if (features == 0)
875 		goto err;
876 
877 	if (features & VIRTIO_NET_F_MAC) {
878 		for (i = 0; i < __arraycount(sc->sc_mac); i++) {
879 			sc->sc_mac[i] = virtio_read_device_config_1(vsc,
880 			    VIRTIO_NET_CONFIG_MAC + i);
881 		}
882 	} else {
883 		/* code stolen from sys/net/if_tap.c */
884 		struct timeval tv;
885 		uint32_t ui;
886 		getmicrouptime(&tv);
887 		ui = (tv.tv_sec ^ tv.tv_usec) & 0xffffff;
888 		memcpy(sc->sc_mac+3, (uint8_t *)&ui, 3);
889 		for (i = 0; i < __arraycount(sc->sc_mac); i++) {
890 			virtio_write_device_config_1(vsc,
891 			    VIRTIO_NET_CONFIG_MAC + i, sc->sc_mac[i]);
892 		}
893 	}
894 
895 	/* 'Ethernet' with capital follows other ethernet driver attachment */
896 	aprint_normal_dev(self, "Ethernet address %s\n",
897 	    ether_sprintf(sc->sc_mac));
898 
899 	if (features & (VIRTIO_NET_F_MRG_RXBUF | VIRTIO_F_VERSION_1)) {
900 		sc->sc_hdr_size = sizeof(struct virtio_net_hdr);
901 	} else {
902 		sc->sc_hdr_size = offsetof(struct virtio_net_hdr, num_buffers);
903 	}
904 
905 	if ((features & VIRTIO_NET_F_CTRL_VQ) &&
906 	    (features & VIRTIO_NET_F_CTRL_RX)) {
907 		sc->sc_has_ctrl = true;
908 
909 		cv_init(&ctrlq->ctrlq_wait, "ctrl_vq");
910 		mutex_init(&ctrlq->ctrlq_wait_lock, MUTEX_DEFAULT, IPL_NET);
911 		ctrlq->ctrlq_inuse = FREE;
912 	} else {
913 		sc->sc_has_ctrl = false;
914 	}
915 
916 	if (sc->sc_has_ctrl && (features & VIRTIO_NET_F_MQ)) {
917 		sc->sc_max_nvq_pairs = virtio_read_device_config_2(vsc,
918 		    VIRTIO_NET_CONFIG_MAX_VQ_PAIRS);
919 
920 		if (sc->sc_max_nvq_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX)
921 			goto err;
922 
923 		/* Limit the number of queue pairs to use */
924 		sc->sc_req_nvq_pairs = MIN(sc->sc_max_nvq_pairs, ncpu);
925 	}
926 
927 	vioif_alloc_queues(sc);
928 	virtio_child_attach_set_vqs(vsc, sc->sc_vqs, sc->sc_req_nvq_pairs);
929 
930 #ifdef VIOIF_MPSAFE
931 	softint_flags = SOFTINT_NET | SOFTINT_MPSAFE;
932 #else
933 	softint_flags = SOFTINT_NET;
934 #endif
935 
936 	/*
937 	 * Allocating virtqueues
938 	 */
939 	for (i = 0; i < sc->sc_max_nvq_pairs; i++) {
940 		rxq = &sc->sc_rxq[i];
941 		txq = &sc->sc_txq[i];
942 		char qname[32];
943 
944 		rxq->rxq_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NET);
945 
946 		rxq->rxq_handle_si = softint_establish(softint_flags,
947 		    vioif_rx_handle, rxq);
948 		if (rxq->rxq_handle_si == NULL) {
949 			aprint_error_dev(self, "cannot establish rx softint\n");
950 			goto err;
951 		}
952 
953 		snprintf(qname, sizeof(qname), "rx%d", i);
954 		r = virtio_alloc_vq(vsc, rxq->rxq_vq, nvqs,
955 		    MCLBYTES + sc->sc_hdr_size, 2, qname);
956 		if (r != 0)
957 			goto err;
958 		nvqs++;
959 		rxq->rxq_vq->vq_intrhand = vioif_rx_intr;
960 		rxq->rxq_vq->vq_intrhand_arg = (void *)rxq;
961 		rxq->rxq_stopping = true;
962 		vioif_work_set(&rxq->rxq_work, vioif_rx_handle, rxq);
963 
964 		txq->txq_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NET);
965 
966 		txq->txq_deferred_transmit = softint_establish(softint_flags,
967 		    vioif_deferred_transmit, txq);
968 		if (txq->txq_deferred_transmit == NULL) {
969 			aprint_error_dev(self, "cannot establish tx softint\n");
970 			goto err;
971 		}
972 		txq->txq_handle_si = softint_establish(softint_flags,
973 		    vioif_tx_handle, txq);
974 		if (txq->txq_handle_si == NULL) {
975 			aprint_error_dev(self, "cannot establish tx softint\n");
976 			goto err;
977 		}
978 
979 		snprintf(qname, sizeof(qname), "tx%d", i);
980 		r = virtio_alloc_vq(vsc, txq->txq_vq, nvqs,
981 		    sc->sc_hdr_size + (ETHER_MAX_LEN - ETHER_HDR_LEN),
982 		    VIRTIO_NET_TX_MAXNSEGS + 1, qname);
983 		if (r != 0)
984 			goto err;
985 		nvqs++;
986 		txq->txq_vq->vq_intrhand = vioif_tx_intr;
987 		txq->txq_vq->vq_intrhand_arg = (void *)txq;
988 		txq->txq_link_active = VIOIF_IS_LINK_ACTIVE(sc);
989 		txq->txq_stopping = false;
990 		txq->txq_intrq = pcq_create(txq->txq_vq->vq_num, KM_SLEEP);
991 		vioif_work_set(&txq->txq_work, vioif_tx_handle, txq);
992 	}
993 
994 	if (sc->sc_has_ctrl) {
995 		/*
996 		 * Allocating a virtqueue for control channel
997 		 */
998 		r = virtio_alloc_vq(vsc, ctrlq->ctrlq_vq, nvqs,
999 		    NBPG, 1, "control");
1000 		if (r != 0) {
1001 			aprint_error_dev(self, "failed to allocate "
1002 			    "a virtqueue for control channel, error code %d\n",
1003 			    r);
1004 
1005 			sc->sc_has_ctrl = false;
1006 			cv_destroy(&ctrlq->ctrlq_wait);
1007 			mutex_destroy(&ctrlq->ctrlq_wait_lock);
1008 		} else {
1009 			nvqs++;
1010 			ctrlq->ctrlq_vq->vq_intrhand = vioif_ctrl_intr;
1011 			ctrlq->ctrlq_vq->vq_intrhand_arg = (void *) ctrlq;
1012 		}
1013 	}
1014 
1015 	sc->sc_ctl_softint = softint_establish(softint_flags,
1016 	    vioif_ctl_softint, sc);
1017 	if (sc->sc_ctl_softint == NULL) {
1018 		aprint_error_dev(self, "cannot establish ctl softint\n");
1019 		goto err;
1020 	}
1021 
1022 	if (vioif_alloc_mems(sc) < 0)
1023 		goto err;
1024 
1025 	if (virtio_child_attach_finish(vsc) != 0)
1026 		goto err;
1027 
1028 	if (vioif_setup_sysctl(sc) != 0) {
1029 		aprint_error_dev(self, "unable to create sysctl node\n");
1030 		/* continue */
1031 	}
1032 
1033 	vioif_setup_stats(sc);
1034 
1035 	strlcpy(ifp->if_xname, device_xname(self), IFNAMSIZ);
1036 	ifp->if_softc = sc;
1037 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1038 #ifdef VIOIF_MPSAFE
1039 	ifp->if_extflags = IFEF_MPSAFE;
1040 #endif
1041 	ifp->if_start = vioif_start;
1042 	if (sc->sc_req_nvq_pairs > 1)
1043 		ifp->if_transmit = vioif_transmit;
1044 	ifp->if_ioctl = vioif_ioctl;
1045 	ifp->if_init = vioif_init;
1046 	ifp->if_stop = vioif_stop;
1047 	ifp->if_capabilities = 0;
1048 	ifp->if_watchdog = vioif_watchdog;
1049 	txq = &sc->sc_txq[0];
1050 	IFQ_SET_MAXLEN(&ifp->if_snd, MAX(txq->txq_vq->vq_num, IFQ_MAXLEN));
1051 	IFQ_SET_READY(&ifp->if_snd);
1052 
1053 	sc->sc_ethercom.ec_capabilities |= ETHERCAP_VLAN_MTU;
1054 
1055 	if_attach(ifp);
1056 	if_deferred_start_init(ifp, NULL);
1057 	ether_ifattach(ifp, sc->sc_mac);
1058 	ether_set_ifflags_cb(&sc->sc_ethercom, vioif_ifflags_cb);
1059 
1060 	return;
1061 
1062 err:
1063 	for (i = 0; i < sc->sc_max_nvq_pairs; i++) {
1064 		rxq = &sc->sc_rxq[i];
1065 		txq = &sc->sc_txq[i];
1066 
1067 		if (rxq->rxq_lock) {
1068 			mutex_obj_free(rxq->rxq_lock);
1069 			rxq->rxq_lock = NULL;
1070 		}
1071 
1072 		if (rxq->rxq_handle_si) {
1073 			softint_disestablish(rxq->rxq_handle_si);
1074 			rxq->rxq_handle_si = NULL;
1075 		}
1076 
1077 		if (txq->txq_lock) {
1078 			mutex_obj_free(txq->txq_lock);
1079 			txq->txq_lock = NULL;
1080 		}
1081 
1082 		if (txq->txq_handle_si) {
1083 			softint_disestablish(txq->txq_handle_si);
1084 			txq->txq_handle_si = NULL;
1085 		}
1086 
1087 		if (txq->txq_deferred_transmit) {
1088 			softint_disestablish(txq->txq_deferred_transmit);
1089 			txq->txq_deferred_transmit = NULL;
1090 		}
1091 
1092 		if (txq->txq_intrq) {
1093 			pcq_destroy(txq->txq_intrq);
1094 			txq->txq_intrq = NULL;
1095 		}
1096 	}
1097 
1098 	if (sc->sc_has_ctrl) {
1099 		cv_destroy(&ctrlq->ctrlq_wait);
1100 		mutex_destroy(&ctrlq->ctrlq_wait_lock);
1101 	}
1102 
1103 	while (nvqs > 0)
1104 		virtio_free_vq(vsc, &sc->sc_vqs[--nvqs]);
1105 
1106 	vioif_free_queues(sc);
1107 	mutex_destroy(&sc->sc_lock);
1108 	virtio_child_attach_failed(vsc);
1109 	config_finalize_register(self, vioif_finalize_teardown);
1110 
1111 	return;
1112 }
1113 
1114 static int
1115 vioif_finalize_teardown(device_t self)
1116 {
1117 	struct vioif_softc *sc = device_private(self);
1118 
1119 	if (sc->sc_txrx_workqueue != NULL) {
1120 		vioif_workq_destroy(sc->sc_txrx_workqueue);
1121 		sc->sc_txrx_workqueue = NULL;
1122 	}
1123 
1124 	return 0;
1125 }
1126 
1127 static void
1128 vioif_enable_interrupt_vqpairs(struct vioif_softc *sc)
1129 {
1130 	struct virtio_softc *vsc = sc->sc_virtio;
1131 	struct vioif_txqueue *txq;
1132 	struct vioif_rxqueue *rxq;
1133 	int i;
1134 
1135 	for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
1136 		txq = &sc->sc_txq[i];
1137 		rxq = &sc->sc_rxq[i];
1138 
1139 		virtio_start_vq_intr(vsc, txq->txq_vq);
1140 		virtio_start_vq_intr(vsc, rxq->rxq_vq);
1141 	}
1142 }
1143 
1144 static void
1145 vioif_disable_interrupt_vqpairs(struct vioif_softc *sc)
1146 {
1147 	struct virtio_softc *vsc = sc->sc_virtio;
1148 	struct vioif_txqueue *txq;
1149 	struct vioif_rxqueue *rxq;
1150 	int i;
1151 
1152 	for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
1153 		rxq = &sc->sc_rxq[i];
1154 		txq = &sc->sc_txq[i];
1155 
1156 		virtio_stop_vq_intr(vsc, rxq->rxq_vq);
1157 		virtio_stop_vq_intr(vsc, txq->txq_vq);
1158 	}
1159 }
1160 
1161 /*
1162  * Interface functions for ifnet
1163  */
1164 static int
1165 vioif_init(struct ifnet *ifp)
1166 {
1167 	struct vioif_softc *sc = ifp->if_softc;
1168 	struct virtio_softc *vsc = sc->sc_virtio;
1169 	struct vioif_rxqueue *rxq;
1170 	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
1171 	int r, i;
1172 
1173 	vioif_stop(ifp, 0);
1174 
1175 	r = virtio_reinit_start(vsc);
1176 	if (r != 0) {
1177 		log(LOG_ERR, "%s: reset failed\n", ifp->if_xname);
1178 		return EIO;
1179 	}
1180 
1181 	virtio_negotiate_features(vsc, virtio_features(vsc));
1182 
1183 	for (i = 0; i < sc->sc_req_nvq_pairs; i++) {
1184 		rxq = &sc->sc_rxq[i];
1185 
1186 		/* Have to set false before vioif_populate_rx_mbufs */
1187 		mutex_enter(rxq->rxq_lock);
1188 		rxq->rxq_stopping = false;
1189 		vioif_populate_rx_mbufs_locked(sc, rxq);
1190 		mutex_exit(rxq->rxq_lock);
1191 
1192 	}
1193 
1194 	virtio_reinit_end(vsc);
1195 
1196 	if (sc->sc_has_ctrl)
1197 		virtio_start_vq_intr(vsc, ctrlq->ctrlq_vq);
1198 
1199 	r = vioif_ctrl_mq_vq_pairs_set(sc, sc->sc_req_nvq_pairs);
1200 	if (r == 0)
1201 		sc->sc_act_nvq_pairs = sc->sc_req_nvq_pairs;
1202 	else
1203 		sc->sc_act_nvq_pairs = 1;
1204 
1205 	for (i = 0; i < sc->sc_act_nvq_pairs; i++)
1206 		sc->sc_txq[i].txq_stopping = false;
1207 
1208 	vioif_enable_interrupt_vqpairs(sc);
1209 
1210 	vioif_update_link_status(sc);
1211 	ifp->if_flags |= IFF_RUNNING;
1212 	ifp->if_flags &= ~IFF_OACTIVE;
1213 	r = vioif_rx_filter(sc);
1214 
1215 	return r;
1216 }
1217 
1218 static void
1219 vioif_stop(struct ifnet *ifp, int disable)
1220 {
1221 	struct vioif_softc *sc = ifp->if_softc;
1222 	struct virtio_softc *vsc = sc->sc_virtio;
1223 	struct vioif_txqueue *txq;
1224 	struct vioif_rxqueue *rxq;
1225 	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
1226 	int i;
1227 
1228 	/* disable interrupts */
1229 	vioif_disable_interrupt_vqpairs(sc);
1230 	if (sc->sc_has_ctrl)
1231 		virtio_stop_vq_intr(vsc, ctrlq->ctrlq_vq);
1232 
1233 	/*
1234 	 * stop all packet processing:
1235 	 * 1. stop interrupt handlers by rxq_stopping and txq_stopping
1236 	 * 2. wait for stopping workqueue for packet processing
1237 	 */
1238 	for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
1239 		txq = &sc->sc_txq[i];
1240 		rxq = &sc->sc_rxq[i];
1241 
1242 		mutex_enter(rxq->rxq_lock);
1243 		rxq->rxq_stopping = true;
1244 		mutex_exit(rxq->rxq_lock);
1245 		vioif_work_wait(sc->sc_txrx_workqueue, &rxq->rxq_work);
1246 
1247 		mutex_enter(txq->txq_lock);
1248 		txq->txq_stopping = true;
1249 		mutex_exit(txq->txq_lock);
1250 		vioif_work_wait(sc->sc_txrx_workqueue, &txq->txq_work);
1251 	}
1252 
1253 	/* only way to stop I/O and DMA is resetting... */
1254 	virtio_reset(vsc);
1255 
1256 	for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
1257 		vioif_rx_queue_clear(&sc->sc_rxq[i]);
1258 		vioif_tx_queue_clear(&sc->sc_txq[i]);
1259 	}
1260 
1261 	ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);
1262 
1263 	for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
1264 		txq = &sc->sc_txq[i];
1265 		rxq = &sc->sc_rxq[i];
1266 
1267 		if (disable)
1268 			vioif_rx_drain(rxq);
1269 
1270 		vioif_tx_drain(txq);
1271 	}
1272 }
1273 
1274 static void
1275 vioif_send_common_locked(struct ifnet *ifp, struct vioif_txqueue *txq,
1276     bool is_transmit)
1277 {
1278 	struct vioif_softc *sc = ifp->if_softc;
1279 	struct virtio_softc *vsc = sc->sc_virtio;
1280 	struct virtqueue *vq = txq->txq_vq;
1281 	struct virtio_net_hdr *hdr;
1282 	struct mbuf *m;
1283 	int queued = 0;
1284 
1285 	KASSERT(mutex_owned(txq->txq_lock));
1286 
1287 	if ((ifp->if_flags & IFF_RUNNING) == 0)
1288 		return;
1289 
1290 	if (!txq->txq_link_active || txq->txq_stopping)
1291 		return;
1292 
1293 	if ((ifp->if_flags & IFF_OACTIVE) != 0 && !is_transmit)
1294 		return;
1295 
1296 	for (;;) {
1297 		int slot, r;
1298 
1299 		if (is_transmit)
1300 			m = pcq_get(txq->txq_intrq);
1301 		else
1302 			IFQ_DEQUEUE(&ifp->if_snd, m);
1303 
1304 		if (m == NULL)
1305 			break;
1306 
1307 		r = virtio_enqueue_prep(vsc, vq, &slot);
1308 		if (r == EAGAIN) {
1309 			ifp->if_flags |= IFF_OACTIVE;
1310 			m_freem(m);
1311 			break;
1312 		}
1313 		if (r != 0)
1314 			panic("enqueue_prep for a tx buffer");
1315 
1316 		r = bus_dmamap_load_mbuf(virtio_dmat(vsc),
1317 		    txq->txq_dmamaps[slot], m, BUS_DMA_WRITE | BUS_DMA_NOWAIT);
1318 		if (r != 0) {
1319 			/* maybe just too fragmented */
1320 			struct mbuf *newm;
1321 
1322 			newm = m_defrag(m, M_NOWAIT);
1323 			if (newm == NULL) {
1324 				txq->txq_defrag_failed.ev_count++;
1325 				goto skip;
1326 			}
1327 
1328 			m = newm;
1329 			r = bus_dmamap_load_mbuf(virtio_dmat(vsc),
1330 			    txq->txq_dmamaps[slot], m,
1331 			    BUS_DMA_WRITE | BUS_DMA_NOWAIT);
1332 			if (r != 0) {
1333 				txq->txq_mbuf_load_failed.ev_count++;
1334 skip:
1335 				m_freem(m);
1336 				virtio_enqueue_abort(vsc, vq, slot);
1337 				continue;
1338 			}
1339 		}
1340 
1341 		/* This should actually never fail */
1342 		r = virtio_enqueue_reserve(vsc, vq, slot,
1343 		    txq->txq_dmamaps[slot]->dm_nsegs + 1);
1344 		if (r != 0) {
1345 			txq->txq_enqueue_reserve_failed.ev_count++;
1346 			bus_dmamap_unload(virtio_dmat(vsc),
1347 			     txq->txq_dmamaps[slot]);
1348 			/* slot already freed by virtio_enqueue_reserve */
1349 			m_freem(m);
1350 			continue;
1351 		}
1352 
1353 		txq->txq_mbufs[slot] = m;
1354 
1355 		hdr = &txq->txq_hdrs[slot];
1356 		memset(hdr, 0, sc->sc_hdr_size);
1357 		bus_dmamap_sync(virtio_dmat(vsc), txq->txq_dmamaps[slot],
1358 		    0, txq->txq_dmamaps[slot]->dm_mapsize,
1359 		    BUS_DMASYNC_PREWRITE);
1360 		bus_dmamap_sync(virtio_dmat(vsc), txq->txq_hdr_dmamaps[slot],
1361 		    0, txq->txq_hdr_dmamaps[slot]->dm_mapsize,
1362 		    BUS_DMASYNC_PREWRITE);
1363 		virtio_enqueue(vsc, vq, slot, txq->txq_hdr_dmamaps[slot], true);
1364 		virtio_enqueue(vsc, vq, slot, txq->txq_dmamaps[slot], true);
1365 		virtio_enqueue_commit(vsc, vq, slot, false);
1366 
1367 		queued++;
1368 		bpf_mtap(ifp, m, BPF_D_OUT);
1369 	}
1370 
1371 	if (queued > 0) {
1372 		virtio_enqueue_commit(vsc, vq, -1, true);
1373 		ifp->if_timer = 5;
1374 	}
1375 }
1376 
1377 static void
1378 vioif_start_locked(struct ifnet *ifp, struct vioif_txqueue *txq)
1379 {
1380 
1381 	/*
1382 	 * ifp->if_obytes and ifp->if_omcasts are added in if_transmit()@if.c.
1383 	 */
1384 	vioif_send_common_locked(ifp, txq, false);
1385 
1386 }
1387 
1388 static void
1389 vioif_start(struct ifnet *ifp)
1390 {
1391 	struct vioif_softc *sc = ifp->if_softc;
1392 	struct vioif_txqueue *txq = &sc->sc_txq[0];
1393 
1394 #ifdef VIOIF_MPSAFE
1395 	KASSERT(if_is_mpsafe(ifp));
1396 #endif
1397 
1398 	mutex_enter(txq->txq_lock);
1399 	vioif_start_locked(ifp, txq);
1400 	mutex_exit(txq->txq_lock);
1401 }
1402 
1403 static inline int
1404 vioif_select_txqueue(struct ifnet *ifp, struct mbuf *m)
1405 {
1406 	struct vioif_softc *sc = ifp->if_softc;
1407 	u_int cpuid = cpu_index(curcpu());
1408 
1409 	return cpuid % sc->sc_act_nvq_pairs;
1410 }
1411 
1412 static void
1413 vioif_transmit_locked(struct ifnet *ifp, struct vioif_txqueue *txq)
1414 {
1415 
1416 	vioif_send_common_locked(ifp, txq, true);
1417 }
1418 
1419 static int
1420 vioif_transmit(struct ifnet *ifp, struct mbuf *m)
1421 {
1422 	struct vioif_softc *sc = ifp->if_softc;
1423 	struct vioif_txqueue *txq;
1424 	int qid;
1425 
1426 	qid = vioif_select_txqueue(ifp, m);
1427 	txq = &sc->sc_txq[qid];
1428 
1429 	if (__predict_false(!pcq_put(txq->txq_intrq, m))) {
1430 		m_freem(m);
1431 		return ENOBUFS;
1432 	}
1433 
1434 	net_stat_ref_t nsr = IF_STAT_GETREF(ifp);
1435 	if_statadd_ref(nsr, if_obytes, m->m_pkthdr.len);
1436 	if (m->m_flags & M_MCAST)
1437 		if_statinc_ref(nsr, if_omcasts);
1438 	IF_STAT_PUTREF(ifp);
1439 
1440 	if (mutex_tryenter(txq->txq_lock)) {
1441 		vioif_transmit_locked(ifp, txq);
1442 		mutex_exit(txq->txq_lock);
1443 	}
1444 
1445 	return 0;
1446 }
1447 
1448 static void
1449 vioif_deferred_transmit(void *arg)
1450 {
1451 	struct vioif_txqueue *txq = arg;
1452 	struct virtio_softc *vsc = txq->txq_vq->vq_owner;
1453 	struct vioif_softc *sc = device_private(virtio_child(vsc));
1454 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1455 
1456 	mutex_enter(txq->txq_lock);
1457 	vioif_send_common_locked(ifp, txq, true);
1458 	mutex_exit(txq->txq_lock);
1459 }
1460 
1461 static int
1462 vioif_ioctl(struct ifnet *ifp, u_long cmd, void *data)
1463 {
1464 	int s, r;
1465 
1466 	s = splnet();
1467 
1468 	r = ether_ioctl(ifp, cmd, data);
1469 	if (r == ENETRESET && (cmd == SIOCADDMULTI || cmd == SIOCDELMULTI)) {
1470 		if (ifp->if_flags & IFF_RUNNING) {
1471 			r = vioif_rx_filter(ifp->if_softc);
1472 		} else {
1473 			r = 0;
1474 		}
1475 	}
1476 
1477 	splx(s);
1478 
1479 	return r;
1480 }
1481 
1482 void
1483 vioif_watchdog(struct ifnet *ifp)
1484 {
1485 	struct vioif_softc *sc = ifp->if_softc;
1486 	int i;
1487 
1488 	if (ifp->if_flags & IFF_RUNNING) {
1489 		for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
1490 			vioif_tx_queue_clear(&sc->sc_txq[i]);
1491 		}
1492 	}
1493 }
1494 
1495 /*
1496  * Receive implementation
1497  */
1498 /* allocate and initialize a mbuf for receive */
1499 static int
1500 vioif_add_rx_mbuf(struct vioif_rxqueue *rxq, int i)
1501 {
1502 	struct virtio_softc *vsc = rxq->rxq_vq->vq_owner;
1503 	struct mbuf *m;
1504 	int r;
1505 
1506 	MGETHDR(m, M_DONTWAIT, MT_DATA);
1507 	if (m == NULL)
1508 		return ENOBUFS;
1509 	MCLGET(m, M_DONTWAIT);
1510 	if ((m->m_flags & M_EXT) == 0) {
1511 		m_freem(m);
1512 		return ENOBUFS;
1513 	}
1514 	rxq->rxq_mbufs[i] = m;
1515 	m->m_len = m->m_pkthdr.len = m->m_ext.ext_size;
1516 	r = bus_dmamap_load_mbuf(virtio_dmat(vsc),
1517 	    rxq->rxq_dmamaps[i], m, BUS_DMA_READ | BUS_DMA_NOWAIT);
1518 	if (r) {
1519 		m_freem(m);
1520 		rxq->rxq_mbufs[i] = NULL;
1521 		return r;
1522 	}
1523 
1524 	return 0;
1525 }
1526 
1527 /* free a mbuf for receive */
1528 static void
1529 vioif_free_rx_mbuf(struct vioif_rxqueue *rxq, int i)
1530 {
1531 	struct virtio_softc *vsc = rxq->rxq_vq->vq_owner;
1532 
1533 	bus_dmamap_unload(virtio_dmat(vsc), rxq->rxq_dmamaps[i]);
1534 	m_freem(rxq->rxq_mbufs[i]);
1535 	rxq->rxq_mbufs[i] = NULL;
1536 }
1537 
1538 /* add mbufs for all the empty receive slots */
1539 static void
1540 vioif_populate_rx_mbufs_locked(struct vioif_softc *sc, struct vioif_rxqueue *rxq)
1541 {
1542 	struct virtqueue *vq = rxq->rxq_vq;
1543 	struct virtio_softc *vsc = vq->vq_owner;
1544 	int i, r, ndone = 0;
1545 
1546 	KASSERT(mutex_owned(rxq->rxq_lock));
1547 
1548 	if (rxq->rxq_stopping)
1549 		return;
1550 
1551 	for (i = 0; i < vq->vq_num; i++) {
1552 		int slot;
1553 		r = virtio_enqueue_prep(vsc, vq, &slot);
1554 		if (r == EAGAIN)
1555 			break;
1556 		if (r != 0)
1557 			panic("enqueue_prep for rx buffers");
1558 		if (rxq->rxq_mbufs[slot] == NULL) {
1559 			r = vioif_add_rx_mbuf(rxq, slot);
1560 			if (r != 0) {
1561 				rxq->rxq_mbuf_add_failed.ev_count++;
1562 				break;
1563 			}
1564 		}
1565 		r = virtio_enqueue_reserve(vsc, vq, slot,
1566 		    rxq->rxq_dmamaps[slot]->dm_nsegs + 1);
1567 		if (r != 0) {
1568 			vioif_free_rx_mbuf(rxq, slot);
1569 			break;
1570 		}
1571 		bus_dmamap_sync(virtio_dmat(vsc), rxq->rxq_hdr_dmamaps[slot],
1572 		    0, sc->sc_hdr_size, BUS_DMASYNC_PREREAD);
1573 		bus_dmamap_sync(virtio_dmat(vsc), rxq->rxq_dmamaps[slot],
1574 		    0, MCLBYTES, BUS_DMASYNC_PREREAD);
1575 		virtio_enqueue(vsc, vq, slot, rxq->rxq_hdr_dmamaps[slot],
1576 		    false);
1577 		virtio_enqueue(vsc, vq, slot, rxq->rxq_dmamaps[slot], false);
1578 		virtio_enqueue_commit(vsc, vq, slot, false);
1579 		ndone++;
1580 	}
1581 	if (ndone > 0)
1582 		virtio_enqueue_commit(vsc, vq, -1, true);
1583 }
1584 
1585 static void
1586 vioif_rx_queue_clear(struct vioif_rxqueue *rxq)
1587 {
1588 	struct virtqueue *vq = rxq->rxq_vq;
1589 	struct virtio_softc *vsc = vq->vq_owner;
1590 	struct vioif_softc *sc = device_private(virtio_child(vsc));
1591 	u_int limit = UINT_MAX;
1592 	bool more;
1593 
1594 	KASSERT(rxq->rxq_stopping);
1595 
1596 	mutex_enter(rxq->rxq_lock);
1597 	for (;;) {
1598 		more = vioif_rx_deq_locked(sc, vsc, rxq, limit);
1599 		if (more == false)
1600 			break;
1601 	}
1602 	mutex_exit(rxq->rxq_lock);
1603 }
1604 
1605 /* dequeue received packets */
1606 static bool
1607 vioif_rx_deq_locked(struct vioif_softc *sc, struct virtio_softc *vsc,
1608     struct vioif_rxqueue *rxq, u_int limit)
1609 {
1610 	struct virtqueue *vq = rxq->rxq_vq;
1611 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1612 	struct mbuf *m;
1613 	int slot, len;
1614 	bool more = false, dequeued = false;
1615 
1616 	KASSERT(mutex_owned(rxq->rxq_lock));
1617 
1618 	if (virtio_vq_is_enqueued(vsc, vq) == false)
1619 		return false;
1620 
1621 	for (;;) {
1622 		if (limit-- == 0) {
1623 			more = true;
1624 			break;
1625 		}
1626 
1627 		if (virtio_dequeue(vsc, vq, &slot, &len) != 0)
1628 			break;
1629 
1630 		dequeued = true;
1631 
1632 		len -= sc->sc_hdr_size;
1633 		bus_dmamap_sync(virtio_dmat(vsc), rxq->rxq_hdr_dmamaps[slot],
1634 		    0, sc->sc_hdr_size, BUS_DMASYNC_POSTREAD);
1635 		bus_dmamap_sync(virtio_dmat(vsc), rxq->rxq_dmamaps[slot],
1636 		    0, MCLBYTES, BUS_DMASYNC_POSTREAD);
1637 		m = rxq->rxq_mbufs[slot];
1638 		KASSERT(m != NULL);
1639 		bus_dmamap_unload(virtio_dmat(vsc), rxq->rxq_dmamaps[slot]);
1640 		rxq->rxq_mbufs[slot] = NULL;
1641 		virtio_dequeue_commit(vsc, vq, slot);
1642 		m_set_rcvif(m, ifp);
1643 		m->m_len = m->m_pkthdr.len = len;
1644 
1645 		mutex_exit(rxq->rxq_lock);
1646 		if_percpuq_enqueue(ifp->if_percpuq, m);
1647 		mutex_enter(rxq->rxq_lock);
1648 
1649 		if (rxq->rxq_stopping)
1650 			break;
1651 	}
1652 
1653 	if (dequeued)
1654 		vioif_populate_rx_mbufs_locked(sc, rxq);
1655 
1656 	return more;
1657 }
1658 
1659 /* rx interrupt; call _dequeue above and schedule a softint */
1660 
1661 static void
1662 vioif_rx_handle_locked(void *xrxq, u_int limit)
1663 {
1664 	struct vioif_rxqueue *rxq = xrxq;
1665 	struct virtqueue *vq = rxq->rxq_vq;
1666 	struct virtio_softc *vsc = vq->vq_owner;
1667 	struct vioif_softc *sc = device_private(virtio_child(vsc));
1668 	bool more;
1669 
1670 	KASSERT(!rxq->rxq_stopping);
1671 
1672 	more = vioif_rx_deq_locked(sc, vsc, rxq, limit);
1673 	if (more) {
1674 		vioif_rx_sched_handle(sc, rxq);
1675 		return;
1676 	}
1677 	more = virtio_start_vq_intr(vsc, rxq->rxq_vq);
1678 	if (more) {
1679 		vioif_rx_sched_handle(sc, rxq);
1680 		return;
1681 	}
1682 	atomic_store_relaxed(&rxq->rxq_active, false);
1683 }
1684 
1685 static int
1686 vioif_rx_intr(void *arg)
1687 {
1688 	struct vioif_rxqueue *rxq = arg;
1689 	struct virtqueue *vq = rxq->rxq_vq;
1690 	struct virtio_softc *vsc = vq->vq_owner;
1691 	struct vioif_softc *sc = device_private(virtio_child(vsc));
1692 	u_int limit;
1693 
1694 	limit = sc->sc_rx_intr_process_limit;
1695 
1696 	if (atomic_load_relaxed(&rxq->rxq_active) == true)
1697 		return 1;
1698 
1699 	mutex_enter(rxq->rxq_lock);
1700 
1701 	if (!rxq->rxq_stopping) {
1702 		rxq->rxq_workqueue = sc->sc_txrx_workqueue_sysctl;
1703 
1704 		virtio_stop_vq_intr(vsc, vq);
1705 		atomic_store_relaxed(&rxq->rxq_active, true);
1706 
1707 		vioif_rx_handle_locked(rxq, limit);
1708 	}
1709 
1710 	mutex_exit(rxq->rxq_lock);
1711 	return 1;
1712 }
1713 
1714 static void
1715 vioif_rx_handle(void *xrxq)
1716 {
1717 	struct vioif_rxqueue *rxq = xrxq;
1718 	struct virtqueue *vq = rxq->rxq_vq;
1719 	struct virtio_softc *vsc = vq->vq_owner;
1720 	struct vioif_softc *sc = device_private(virtio_child(vsc));
1721 	u_int limit;
1722 
1723 	limit = sc->sc_rx_process_limit;
1724 
1725 	mutex_enter(rxq->rxq_lock);
1726 
1727 	if (!rxq->rxq_stopping)
1728 		vioif_rx_handle_locked(rxq, limit);
1729 
1730 	mutex_exit(rxq->rxq_lock);
1731 }
1732 
1733 static void
1734 vioif_rx_sched_handle(struct vioif_softc *sc, struct vioif_rxqueue *rxq)
1735 {
1736 
1737 	KASSERT(mutex_owned(rxq->rxq_lock));
1738 
1739 	if (rxq->rxq_stopping)
1740 		return;
1741 
1742 	if (rxq->rxq_workqueue)
1743 		vioif_work_add(sc->sc_txrx_workqueue, &rxq->rxq_work);
1744 	else
1745 		softint_schedule(rxq->rxq_handle_si);
1746 }
1747 
1748 /* free all the mbufs; called from if_stop(disable) */
1749 static void
1750 vioif_rx_drain(struct vioif_rxqueue *rxq)
1751 {
1752 	struct virtqueue *vq = rxq->rxq_vq;
1753 	int i;
1754 
1755 	for (i = 0; i < vq->vq_num; i++) {
1756 		if (rxq->rxq_mbufs[i] == NULL)
1757 			continue;
1758 		vioif_free_rx_mbuf(rxq, i);
1759 	}
1760 }
1761 
1762 /*
1763  * Transmition implementation
1764  */
1765 /* actual transmission is done in if_start */
1766 /* tx interrupt; dequeue and free mbufs */
1767 /*
1768  * tx interrupt is actually disabled; this should be called upon
1769  * tx vq full and watchdog
1770  */
1771 
1772 static void
1773 vioif_tx_handle_locked(struct vioif_txqueue *txq, u_int limit)
1774 {
1775 	struct virtqueue *vq = txq->txq_vq;
1776 	struct virtio_softc *vsc = vq->vq_owner;
1777 	struct vioif_softc *sc = device_private(virtio_child(vsc));
1778 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1779 	bool more;
1780 
1781 	KASSERT(!txq->txq_stopping);
1782 
1783 	more = vioif_tx_deq_locked(sc, vsc, txq, limit);
1784 	if (more) {
1785 		vioif_tx_sched_handle(sc, txq);
1786 		return;
1787 	}
1788 
1789 	if (virtio_features(vsc) & VIRTIO_F_RING_EVENT_IDX)
1790 		more = virtio_postpone_intr_smart(vsc, vq);
1791 	else
1792 		more = virtio_start_vq_intr(vsc, vq);
1793 	if (more) {
1794 		vioif_tx_sched_handle(sc, txq);
1795 		return;
1796 	}
1797 
1798 	atomic_store_relaxed(&txq->txq_active, false);
1799 	/* for ALTQ */
1800 	if (txq == &sc->sc_txq[0]) {
1801 		if_schedule_deferred_start(ifp);
1802 		ifp->if_flags &= ~IFF_OACTIVE;
1803 	}
1804 	softint_schedule(txq->txq_deferred_transmit);
1805 }
1806 
1807 
1808 static int
1809 vioif_tx_intr(void *arg)
1810 {
1811 	struct vioif_txqueue *txq = arg;
1812 	struct virtqueue *vq = txq->txq_vq;
1813 	struct virtio_softc *vsc = vq->vq_owner;
1814 	struct vioif_softc *sc = device_private(virtio_child(vsc));
1815 	u_int limit;
1816 
1817 	limit = sc->sc_tx_intr_process_limit;
1818 
1819 	if (atomic_load_relaxed(&txq->txq_active) == true)
1820 		return 1;
1821 
1822 	mutex_enter(txq->txq_lock);
1823 
1824 	if (!txq->txq_stopping) {
1825 		txq->txq_workqueue = sc->sc_txrx_workqueue_sysctl;
1826 
1827 		virtio_stop_vq_intr(vsc, vq);
1828 		atomic_store_relaxed(&txq->txq_active, true);
1829 
1830 		vioif_tx_handle_locked(txq, limit);
1831 	}
1832 
1833 	mutex_exit(txq->txq_lock);
1834 
1835 	return 1;
1836 }
1837 
1838 static void
1839 vioif_tx_handle(void *xtxq)
1840 {
1841 	struct vioif_txqueue *txq = xtxq;
1842 	struct virtqueue *vq = txq->txq_vq;
1843 	struct virtio_softc *vsc = vq->vq_owner;
1844 	struct vioif_softc *sc = device_private(virtio_child(vsc));
1845 	u_int limit;
1846 
1847 	limit = sc->sc_tx_process_limit;
1848 
1849 	mutex_enter(txq->txq_lock);
1850 	if (!txq->txq_stopping)
1851 		vioif_tx_handle_locked(txq, limit);
1852 	mutex_exit(txq->txq_lock);
1853 }
1854 
1855 static void
1856 vioif_tx_sched_handle(struct vioif_softc *sc, struct vioif_txqueue *txq)
1857 {
1858 
1859 	KASSERT(mutex_owned(txq->txq_lock));
1860 
1861 	if (txq->txq_stopping)
1862 		return;
1863 
1864 	if (txq->txq_workqueue)
1865 		vioif_work_add(sc->sc_txrx_workqueue, &txq->txq_work);
1866 	else
1867 		softint_schedule(txq->txq_handle_si);
1868 }
1869 
1870 static void
1871 vioif_tx_queue_clear(struct vioif_txqueue *txq)
1872 {
1873 	struct virtqueue *vq = txq->txq_vq;
1874 	struct virtio_softc *vsc = vq->vq_owner;
1875 	struct vioif_softc *sc = device_private(virtio_child(vsc));
1876 	u_int limit = UINT_MAX;
1877 	bool more;
1878 
1879 	mutex_enter(txq->txq_lock);
1880 	for (;;) {
1881 		more = vioif_tx_deq_locked(sc, vsc, txq, limit);
1882 		if (more == false)
1883 			break;
1884 	}
1885 	mutex_exit(txq->txq_lock);
1886 }
1887 
1888 static bool
1889 vioif_tx_deq_locked(struct vioif_softc *sc, struct virtio_softc *vsc,
1890     struct vioif_txqueue *txq, u_int limit)
1891 {
1892 	struct virtqueue *vq = txq->txq_vq;
1893 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1894 	struct mbuf *m;
1895 	int slot, len;
1896 	bool more = false;
1897 
1898 	KASSERT(mutex_owned(txq->txq_lock));
1899 
1900 	if (virtio_vq_is_enqueued(vsc, vq) == false)
1901 		return false;
1902 
1903 	for (;;) {
1904 		if (limit-- == 0) {
1905 			more = true;
1906 			break;
1907 		}
1908 
1909 		if (virtio_dequeue(vsc, vq, &slot, &len) != 0)
1910 			break;
1911 
1912 		bus_dmamap_sync(virtio_dmat(vsc), txq->txq_hdr_dmamaps[slot],
1913 		    0, sc->sc_hdr_size, BUS_DMASYNC_POSTWRITE);
1914 		bus_dmamap_sync(virtio_dmat(vsc), txq->txq_dmamaps[slot],
1915 		    0, txq->txq_dmamaps[slot]->dm_mapsize,
1916 		    BUS_DMASYNC_POSTWRITE);
1917 		m = txq->txq_mbufs[slot];
1918 		bus_dmamap_unload(virtio_dmat(vsc), txq->txq_dmamaps[slot]);
1919 		txq->txq_mbufs[slot] = NULL;
1920 		virtio_dequeue_commit(vsc, vq, slot);
1921 		if_statinc(ifp, if_opackets);
1922 		m_freem(m);
1923 	}
1924 
1925 	return more;
1926 }
1927 
1928 /* free all the mbufs already put on vq; called from if_stop(disable) */
1929 static void
1930 vioif_tx_drain(struct vioif_txqueue *txq)
1931 {
1932 	struct virtqueue *vq = txq->txq_vq;
1933 	struct virtio_softc *vsc = vq->vq_owner;
1934 	int i;
1935 
1936 	KASSERT(txq->txq_stopping);
1937 
1938 	for (i = 0; i < vq->vq_num; i++) {
1939 		if (txq->txq_mbufs[i] == NULL)
1940 			continue;
1941 		bus_dmamap_unload(virtio_dmat(vsc), txq->txq_dmamaps[i]);
1942 		m_freem(txq->txq_mbufs[i]);
1943 		txq->txq_mbufs[i] = NULL;
1944 	}
1945 }
1946 
1947 /*
1948  * Control vq
1949  */
1950 /* issue a VIRTIO_NET_CTRL_RX class command and wait for completion */
1951 static void
1952 vioif_ctrl_acquire(struct vioif_softc *sc)
1953 {
1954 	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
1955 
1956 	mutex_enter(&ctrlq->ctrlq_wait_lock);
1957 	while (ctrlq->ctrlq_inuse != FREE)
1958 		cv_wait(&ctrlq->ctrlq_wait, &ctrlq->ctrlq_wait_lock);
1959 	ctrlq->ctrlq_inuse = INUSE;
1960 	ctrlq->ctrlq_owner = curlwp;
1961 	mutex_exit(&ctrlq->ctrlq_wait_lock);
1962 }
1963 
1964 static void
1965 vioif_ctrl_release(struct vioif_softc *sc)
1966 {
1967 	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
1968 
1969 	KASSERT(ctrlq->ctrlq_inuse != FREE);
1970 	KASSERT(ctrlq->ctrlq_owner == curlwp);
1971 
1972 	mutex_enter(&ctrlq->ctrlq_wait_lock);
1973 	ctrlq->ctrlq_inuse = FREE;
1974 	ctrlq->ctrlq_owner = NULL;
1975 	cv_signal(&ctrlq->ctrlq_wait);
1976 	mutex_exit(&ctrlq->ctrlq_wait_lock);
1977 }
1978 
1979 static int
1980 vioif_ctrl_load_cmdspec(struct vioif_softc *sc,
1981     struct vioif_ctrl_cmdspec *specs, int nspecs)
1982 {
1983 	struct virtio_softc *vsc = sc->sc_virtio;
1984 	int i, r, loaded;
1985 
1986 	loaded = 0;
1987 	for (i = 0; i < nspecs; i++) {
1988 		r = bus_dmamap_load(virtio_dmat(vsc),
1989 		    specs[i].dmamap, specs[i].buf, specs[i].bufsize,
1990 		    NULL, BUS_DMA_WRITE | BUS_DMA_NOWAIT);
1991 		if (r) {
1992 			sc->sc_ctrlq.ctrlq_cmd_load_failed.ev_count++;
1993 			goto err;
1994 		}
1995 		loaded++;
1996 
1997 	}
1998 
1999 	return r;
2000 
2001 err:
2002 	for (i = 0; i < loaded; i++) {
2003 		bus_dmamap_unload(virtio_dmat(vsc), specs[i].dmamap);
2004 	}
2005 
2006 	return r;
2007 }
2008 
2009 static void
2010 vioif_ctrl_unload_cmdspec(struct vioif_softc *sc,
2011     struct vioif_ctrl_cmdspec *specs, int nspecs)
2012 {
2013 	struct virtio_softc *vsc = sc->sc_virtio;
2014 	int i;
2015 
2016 	for (i = 0; i < nspecs; i++) {
2017 		bus_dmamap_unload(virtio_dmat(vsc), specs[i].dmamap);
2018 	}
2019 }
2020 
2021 static int
2022 vioif_ctrl_send_command(struct vioif_softc *sc, uint8_t class, uint8_t cmd,
2023     struct vioif_ctrl_cmdspec *specs, int nspecs)
2024 {
2025 	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
2026 	struct virtqueue *vq = ctrlq->ctrlq_vq;
2027 	struct virtio_softc *vsc = sc->sc_virtio;
2028 	int i, r, slot;
2029 
2030 	ctrlq->ctrlq_cmd->class = class;
2031 	ctrlq->ctrlq_cmd->command = cmd;
2032 
2033 	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_cmd_dmamap,
2034 	    0, sizeof(struct virtio_net_ctrl_cmd), BUS_DMASYNC_PREWRITE);
2035 	for (i = 0; i < nspecs; i++) {
2036 		bus_dmamap_sync(virtio_dmat(vsc), specs[i].dmamap,
2037 		    0, specs[i].bufsize, BUS_DMASYNC_PREWRITE);
2038 	}
2039 	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_status_dmamap,
2040 	    0, sizeof(struct virtio_net_ctrl_status), BUS_DMASYNC_PREREAD);
2041 
2042 	/* we need to explicitly (re)start vq intr when using RING EVENT IDX */
2043 	if (virtio_features(vsc) & VIRTIO_F_RING_EVENT_IDX)
2044 		virtio_start_vq_intr(vsc, ctrlq->ctrlq_vq);
2045 
2046 	r = virtio_enqueue_prep(vsc, vq, &slot);
2047 	if (r != 0)
2048 		panic("%s: control vq busy!?", device_xname(sc->sc_dev));
2049 	r = virtio_enqueue_reserve(vsc, vq, slot, nspecs + 2);
2050 	if (r != 0)
2051 		panic("%s: control vq busy!?", device_xname(sc->sc_dev));
2052 	virtio_enqueue(vsc, vq, slot, ctrlq->ctrlq_cmd_dmamap, true);
2053 	for (i = 0; i < nspecs; i++) {
2054 		virtio_enqueue(vsc, vq, slot, specs[i].dmamap, true);
2055 	}
2056 	virtio_enqueue(vsc, vq, slot, ctrlq->ctrlq_status_dmamap, false);
2057 	virtio_enqueue_commit(vsc, vq, slot, true);
2058 
2059 	/* wait for done */
2060 	mutex_enter(&ctrlq->ctrlq_wait_lock);
2061 	while (ctrlq->ctrlq_inuse != DONE)
2062 		cv_wait(&ctrlq->ctrlq_wait, &ctrlq->ctrlq_wait_lock);
2063 	mutex_exit(&ctrlq->ctrlq_wait_lock);
2064 	/* already dequeueued */
2065 
2066 	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_cmd_dmamap, 0,
2067 	    sizeof(struct virtio_net_ctrl_cmd), BUS_DMASYNC_POSTWRITE);
2068 	for (i = 0; i < nspecs; i++) {
2069 		bus_dmamap_sync(virtio_dmat(vsc), specs[i].dmamap, 0,
2070 		    specs[i].bufsize, BUS_DMASYNC_POSTWRITE);
2071 	}
2072 	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_status_dmamap, 0,
2073 	    sizeof(struct virtio_net_ctrl_status), BUS_DMASYNC_POSTREAD);
2074 
2075 	if (ctrlq->ctrlq_status->ack == VIRTIO_NET_OK)
2076 		r = 0;
2077 	else {
2078 		device_printf(sc->sc_dev, "failed setting rx mode\n");
2079 		sc->sc_ctrlq.ctrlq_cmd_failed.ev_count++;
2080 		r = EIO;
2081 	}
2082 
2083 	return r;
2084 }
2085 
2086 static int
2087 vioif_ctrl_rx(struct vioif_softc *sc, int cmd, bool onoff)
2088 {
2089 	struct virtio_net_ctrl_rx *rx = sc->sc_ctrlq.ctrlq_rx;
2090 	struct vioif_ctrl_cmdspec specs[1];
2091 	int r;
2092 
2093 	if (!sc->sc_has_ctrl)
2094 		return ENOTSUP;
2095 
2096 	vioif_ctrl_acquire(sc);
2097 
2098 	rx->onoff = onoff;
2099 	specs[0].dmamap = sc->sc_ctrlq.ctrlq_rx_dmamap;
2100 	specs[0].buf = rx;
2101 	specs[0].bufsize = sizeof(*rx);
2102 
2103 	r = vioif_ctrl_send_command(sc, VIRTIO_NET_CTRL_RX, cmd,
2104 	    specs, __arraycount(specs));
2105 
2106 	vioif_ctrl_release(sc);
2107 	return r;
2108 }
2109 
2110 static int
2111 vioif_set_promisc(struct vioif_softc *sc, bool onoff)
2112 {
2113 	return vioif_ctrl_rx(sc, VIRTIO_NET_CTRL_RX_PROMISC, onoff);
2114 }
2115 
2116 static int
2117 vioif_set_allmulti(struct vioif_softc *sc, bool onoff)
2118 {
2119 	return vioif_ctrl_rx(sc, VIRTIO_NET_CTRL_RX_ALLMULTI, onoff);
2120 }
2121 
2122 /* issue VIRTIO_NET_CTRL_MAC_TABLE_SET command and wait for completion */
2123 static int
2124 vioif_set_rx_filter(struct vioif_softc *sc)
2125 {
2126 	/* filter already set in ctrlq->ctrlq_mac_tbl */
2127 	struct virtio_softc *vsc = sc->sc_virtio;
2128 	struct virtio_net_ctrl_mac_tbl *mac_tbl_uc, *mac_tbl_mc;
2129 	struct vioif_ctrl_cmdspec specs[2];
2130 	int nspecs = __arraycount(specs);
2131 	int r;
2132 
2133 	mac_tbl_uc = sc->sc_ctrlq.ctrlq_mac_tbl_uc;
2134 	mac_tbl_mc = sc->sc_ctrlq.ctrlq_mac_tbl_mc;
2135 
2136 	if (!sc->sc_has_ctrl)
2137 		return ENOTSUP;
2138 
2139 	vioif_ctrl_acquire(sc);
2140 
2141 	specs[0].dmamap = sc->sc_ctrlq.ctrlq_tbl_uc_dmamap;
2142 	specs[0].buf = mac_tbl_uc;
2143 	specs[0].bufsize = sizeof(*mac_tbl_uc)
2144 	    + (ETHER_ADDR_LEN * virtio_rw32(vsc, mac_tbl_uc->nentries));
2145 
2146 	specs[1].dmamap = sc->sc_ctrlq.ctrlq_tbl_mc_dmamap;
2147 	specs[1].buf = mac_tbl_mc;
2148 	specs[1].bufsize = sizeof(*mac_tbl_mc)
2149 	    + (ETHER_ADDR_LEN * virtio_rw32(vsc, mac_tbl_mc->nentries));
2150 
2151 	r = vioif_ctrl_load_cmdspec(sc, specs, nspecs);
2152 	if (r != 0)
2153 		goto out;
2154 
2155 	r = vioif_ctrl_send_command(sc,
2156 	    VIRTIO_NET_CTRL_MAC, VIRTIO_NET_CTRL_MAC_TABLE_SET,
2157 	    specs, nspecs);
2158 
2159 	vioif_ctrl_unload_cmdspec(sc, specs, nspecs);
2160 
2161 out:
2162 	vioif_ctrl_release(sc);
2163 
2164 	return r;
2165 }
2166 
2167 static int
2168 vioif_set_mac_addr(struct vioif_softc *sc)
2169 {
2170 	struct virtio_net_ctrl_mac_addr *ma =
2171 	    sc->sc_ctrlq.ctrlq_mac_addr;
2172 	struct vioif_ctrl_cmdspec specs[1];
2173 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
2174 	int nspecs = __arraycount(specs);
2175 	uint64_t features;
2176 	int r;
2177 	size_t i;
2178 
2179 	if (!sc->sc_has_ctrl)
2180 		return ENOTSUP;
2181 
2182 	if (memcmp(CLLADDR(ifp->if_sadl), sc->sc_mac,
2183 	    ETHER_ADDR_LEN) == 0) {
2184 		return 0;
2185 	}
2186 
2187 	memcpy(sc->sc_mac, CLLADDR(ifp->if_sadl), ETHER_ADDR_LEN);
2188 
2189 	features = virtio_features(sc->sc_virtio);
2190 	if (features & VIRTIO_NET_F_CTRL_MAC_ADDR) {
2191 		vioif_ctrl_acquire(sc);
2192 
2193 		memcpy(ma->mac, sc->sc_mac, ETHER_ADDR_LEN);
2194 		specs[0].dmamap = sc->sc_ctrlq.ctrlq_mac_addr_dmamap;
2195 		specs[0].buf = ma;
2196 		specs[0].bufsize = sizeof(*ma);
2197 
2198 		r = vioif_ctrl_send_command(sc,
2199 		    VIRTIO_NET_CTRL_MAC, VIRTIO_NET_CTRL_MAC_ADDR_SET,
2200 		    specs, nspecs);
2201 
2202 		vioif_ctrl_release(sc);
2203 	} else {
2204 		for (i = 0; i < __arraycount(sc->sc_mac); i++) {
2205 			virtio_write_device_config_1(sc->sc_virtio,
2206 			    VIRTIO_NET_CONFIG_MAC + i, sc->sc_mac[i]);
2207 		}
2208 		r = 0;
2209 	}
2210 
2211 	return r;
2212 }
2213 
2214 static int
2215 vioif_ctrl_mq_vq_pairs_set(struct vioif_softc *sc, int nvq_pairs)
2216 {
2217 	struct virtio_net_ctrl_mq *mq = sc->sc_ctrlq.ctrlq_mq;
2218 	struct vioif_ctrl_cmdspec specs[1];
2219 	int r;
2220 
2221 	if (!sc->sc_has_ctrl)
2222 		return ENOTSUP;
2223 
2224 	if (nvq_pairs <= 1)
2225 		return EINVAL;
2226 
2227 	vioif_ctrl_acquire(sc);
2228 
2229 	mq->virtqueue_pairs = virtio_rw16(sc->sc_virtio, nvq_pairs);
2230 	specs[0].dmamap = sc->sc_ctrlq.ctrlq_mq_dmamap;
2231 	specs[0].buf = mq;
2232 	specs[0].bufsize = sizeof(*mq);
2233 
2234 	r = vioif_ctrl_send_command(sc,
2235 	    VIRTIO_NET_CTRL_MQ, VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET,
2236 	    specs, __arraycount(specs));
2237 
2238 	vioif_ctrl_release(sc);
2239 
2240 	return r;
2241 }
2242 
2243 /* ctrl vq interrupt; wake up the command issuer */
2244 static int
2245 vioif_ctrl_intr(void *arg)
2246 {
2247 	struct vioif_ctrlqueue *ctrlq = arg;
2248 	struct virtqueue *vq = ctrlq->ctrlq_vq;
2249 	struct virtio_softc *vsc = vq->vq_owner;
2250 	int r, slot;
2251 
2252 	if (virtio_vq_is_enqueued(vsc, vq) == false)
2253 		return 0;
2254 
2255 	r = virtio_dequeue(vsc, vq, &slot, NULL);
2256 	if (r == ENOENT)
2257 		return 0;
2258 	virtio_dequeue_commit(vsc, vq, slot);
2259 
2260 	mutex_enter(&ctrlq->ctrlq_wait_lock);
2261 	ctrlq->ctrlq_inuse = DONE;
2262 	cv_signal(&ctrlq->ctrlq_wait);
2263 	mutex_exit(&ctrlq->ctrlq_wait_lock);
2264 
2265 	return 1;
2266 }
2267 
2268 static int
2269 vioif_ifflags(struct vioif_softc *sc)
2270 {
2271 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
2272 	bool onoff;
2273 	int r;
2274 
2275 	if (!sc->sc_has_ctrl) {
2276 		/* no ctrl vq; always promisc and allmulti */
2277 		ifp->if_flags |= (IFF_PROMISC | IFF_ALLMULTI);
2278 		return 0;
2279 	}
2280 
2281 	onoff = ifp->if_flags & IFF_ALLMULTI ? true : false;
2282 	r = vioif_set_allmulti(sc, onoff);
2283 	if (r != 0) {
2284 		log(LOG_WARNING,
2285 		    "%s: couldn't %sable ALLMULTI\n",
2286 		    ifp->if_xname, onoff ? "en" : "dis");
2287 		if (onoff == false) {
2288 			ifp->if_flags |= IFF_ALLMULTI;
2289 		}
2290 	}
2291 
2292 	onoff = ifp->if_flags & IFF_PROMISC ? true : false;
2293 	r = vioif_set_promisc(sc, onoff);
2294 	if (r != 0) {
2295 		log(LOG_WARNING,
2296 		    "%s: couldn't %sable PROMISC\n",
2297 		    ifp->if_xname, onoff ? "en" : "dis");
2298 		if (onoff == false) {
2299 			ifp->if_flags |= IFF_PROMISC;
2300 		}
2301 	}
2302 
2303 	return 0;
2304 }
2305 
2306 static int
2307 vioif_ifflags_cb(struct ethercom *ec)
2308 {
2309 	struct ifnet *ifp = &ec->ec_if;
2310 	struct vioif_softc *sc = ifp->if_softc;
2311 
2312 	return vioif_ifflags(sc);
2313 }
2314 
2315 /*
2316  * If multicast filter small enough (<=MAXENTRIES) set rx filter
2317  * If large multicast filter exist use ALLMULTI
2318  * If setting rx filter fails fall back to ALLMULTI
2319  */
2320 static int
2321 vioif_rx_filter(struct vioif_softc *sc)
2322 {
2323 	struct virtio_softc *vsc = sc->sc_virtio;
2324 	struct ethercom *ec = &sc->sc_ethercom;
2325 	struct ifnet *ifp = &ec->ec_if;
2326 	struct ether_multi *enm;
2327 	struct ether_multistep step;
2328 	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
2329 	int nentries;
2330 	bool allmulti = 0;
2331 	int r;
2332 
2333 	if (!sc->sc_has_ctrl) {
2334 		goto set_ifflags;
2335 	}
2336 
2337 	memcpy(ctrlq->ctrlq_mac_tbl_uc->macs[0],
2338 	    CLLADDR(ifp->if_sadl), ETHER_ADDR_LEN);
2339 
2340 	nentries = 0;
2341 	allmulti = false;
2342 
2343 	ETHER_LOCK(ec);
2344 	for (ETHER_FIRST_MULTI(step, ec, enm); enm != NULL;
2345 	    ETHER_NEXT_MULTI(step, enm)) {
2346 		if (nentries >= VIRTIO_NET_CTRL_MAC_MAXENTRIES) {
2347 			allmulti = true;
2348 			break;
2349 		}
2350 		if (memcmp(enm->enm_addrlo, enm->enm_addrhi, ETHER_ADDR_LEN)) {
2351 			allmulti = true;
2352 			break;
2353 		}
2354 
2355 		memcpy(ctrlq->ctrlq_mac_tbl_mc->macs[nentries],
2356 		    enm->enm_addrlo, ETHER_ADDR_LEN);
2357 		nentries++;
2358 	}
2359 	ETHER_UNLOCK(ec);
2360 
2361 	r = vioif_set_mac_addr(sc);
2362 	if (r != 0) {
2363 		log(LOG_WARNING, "%s: couldn't set MAC address\n",
2364 		    ifp->if_xname);
2365 	}
2366 
2367 	if (!allmulti) {
2368 		ctrlq->ctrlq_mac_tbl_uc->nentries = virtio_rw32(vsc, 1);
2369 		ctrlq->ctrlq_mac_tbl_mc->nentries = virtio_rw32(vsc, nentries);
2370 		r = vioif_set_rx_filter(sc);
2371 		if (r != 0) {
2372 			allmulti = true; /* fallback */
2373 		}
2374 	}
2375 
2376 	if (allmulti) {
2377 		ctrlq->ctrlq_mac_tbl_uc->nentries = virtio_rw32(vsc, 0);
2378 		ctrlq->ctrlq_mac_tbl_mc->nentries = virtio_rw32(vsc, 0);
2379 		r = vioif_set_rx_filter(sc);
2380 		if (r != 0) {
2381 			log(LOG_DEBUG, "%s: couldn't clear RX filter\n",
2382 			    ifp->if_xname);
2383 			/* what to do on failure? */
2384 		}
2385 
2386 		ifp->if_flags |= IFF_ALLMULTI;
2387 	}
2388 
2389 set_ifflags:
2390 	r = vioif_ifflags(sc);
2391 
2392 	return r;
2393 }
2394 
2395 static int
2396 vioif_get_link_status(struct vioif_softc *sc)
2397 {
2398 	struct virtio_softc *vsc = sc->sc_virtio;
2399 	uint16_t status;
2400 
2401 	if (virtio_features(vsc) & VIRTIO_NET_F_STATUS)
2402 		status = virtio_read_device_config_2(vsc,
2403 		    VIRTIO_NET_CONFIG_STATUS);
2404 	else
2405 		status = VIRTIO_NET_S_LINK_UP;
2406 
2407 	if ((status & VIRTIO_NET_S_LINK_UP) != 0)
2408 		return LINK_STATE_UP;
2409 
2410 	return LINK_STATE_DOWN;
2411 }
2412 
2413 /* change link status */
2414 static void
2415 vioif_update_link_status(struct vioif_softc *sc)
2416 {
2417 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
2418 	struct vioif_txqueue *txq;
2419 	bool active;
2420 	int link, i;
2421 
2422 	mutex_enter(&sc->sc_lock);
2423 
2424 	link = vioif_get_link_status(sc);
2425 
2426 	if (link == sc->sc_link_state)
2427 		goto done;
2428 
2429 	sc->sc_link_state = link;
2430 
2431 	active = VIOIF_IS_LINK_ACTIVE(sc);
2432 	for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
2433 		txq = &sc->sc_txq[i];
2434 
2435 		mutex_enter(txq->txq_lock);
2436 		txq->txq_link_active = active;
2437 		mutex_exit(txq->txq_lock);
2438 	}
2439 
2440 	if_link_state_change(ifp, sc->sc_link_state);
2441 
2442 done:
2443 	mutex_exit(&sc->sc_lock);
2444 }
2445 
2446 static int
2447 vioif_config_change(struct virtio_softc *vsc)
2448 {
2449 	struct vioif_softc *sc = device_private(virtio_child(vsc));
2450 
2451 	softint_schedule(sc->sc_ctl_softint);
2452 	return 0;
2453 }
2454 
2455 static void
2456 vioif_ctl_softint(void *arg)
2457 {
2458 	struct vioif_softc *sc = arg;
2459 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
2460 
2461 	vioif_update_link_status(sc);
2462 	vioif_start(ifp);
2463 }
2464 
2465 static struct workqueue *
2466 vioif_workq_create(const char *name, pri_t prio, int ipl, int flags)
2467 {
2468 	struct workqueue *wq;
2469 	int error;
2470 
2471 	error = workqueue_create(&wq, name, vioif_workq_work, NULL,
2472 	    prio, ipl, flags);
2473 
2474 	if (error)
2475 		return NULL;
2476 
2477 	return wq;
2478 }
2479 
2480 static void
2481 vioif_workq_destroy(struct workqueue *wq)
2482 {
2483 
2484 	workqueue_destroy(wq);
2485 }
2486 
2487 static void
2488 vioif_workq_work(struct work *wk, void *context)
2489 {
2490 	struct vioif_work *work;
2491 
2492 	work = container_of(wk, struct vioif_work, cookie);
2493 
2494 	atomic_store_relaxed(&work->added, 0);
2495 	work->func(work->arg);
2496 }
2497 
2498 static void
2499 vioif_work_set(struct vioif_work *work, void (*func)(void *), void *arg)
2500 {
2501 
2502 	memset(work, 0, sizeof(*work));
2503 	work->func = func;
2504 	work->arg = arg;
2505 }
2506 
2507 static void
2508 vioif_work_add(struct workqueue *wq, struct vioif_work *work)
2509 {
2510 
2511 	if (atomic_load_relaxed(&work->added) != 0)
2512 		return;
2513 
2514 	atomic_store_relaxed(&work->added, 1);
2515 	kpreempt_disable();
2516 	workqueue_enqueue(wq, &work->cookie, NULL);
2517 	kpreempt_enable();
2518 }
2519 
2520 static void
2521 vioif_work_wait(struct workqueue *wq, struct vioif_work *work)
2522 {
2523 
2524 	workqueue_wait(wq, &work->cookie);
2525 }
2526 
2527 static int
2528 vioif_setup_sysctl(struct vioif_softc *sc)
2529 {
2530 	const char *devname;
2531 	struct sysctllog **log;
2532 	const struct sysctlnode *rnode, *rxnode, *txnode;
2533 	int error;
2534 
2535 	log = &sc->sc_sysctllog;
2536 	devname = device_xname(sc->sc_dev);
2537 
2538 	error = sysctl_createv(log, 0, NULL, &rnode,
2539 	    0, CTLTYPE_NODE, devname,
2540 	    SYSCTL_DESCR("virtio-net information and settings"),
2541 	    NULL, 0, NULL, 0, CTL_HW, CTL_CREATE, CTL_EOL);
2542 	if (error)
2543 		goto out;
2544 
2545 	error = sysctl_createv(log, 0, &rnode, NULL,
2546 	    CTLFLAG_READWRITE, CTLTYPE_BOOL, "txrx_workqueue",
2547 	    SYSCTL_DESCR("Use workqueue for packet processing"),
2548 	    NULL, 0, &sc->sc_txrx_workqueue_sysctl, 0, CTL_CREATE, CTL_EOL);
2549 	if (error)
2550 		goto out;
2551 
2552 	error = sysctl_createv(log, 0, &rnode, &rxnode,
2553 	    0, CTLTYPE_NODE, "rx",
2554 	    SYSCTL_DESCR("virtio-net information and settings for Rx"),
2555 	    NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL);
2556 	if (error)
2557 		goto out;
2558 
2559 	error = sysctl_createv(log, 0, &rxnode, NULL,
2560 	    CTLFLAG_READWRITE, CTLTYPE_INT, "intr_process_limit",
2561 	    SYSCTL_DESCR("max number of Rx packets to process for interrupt processing"),
2562 	    NULL, 0, &sc->sc_rx_intr_process_limit, 0, CTL_CREATE, CTL_EOL);
2563 	if (error)
2564 		goto out;
2565 
2566 	error = sysctl_createv(log, 0, &rxnode, NULL,
2567 	    CTLFLAG_READWRITE, CTLTYPE_INT, "process_limit",
2568 	    SYSCTL_DESCR("max number of Rx packets to process for deferred processing"),
2569 	    NULL, 0, &sc->sc_rx_process_limit, 0, CTL_CREATE, CTL_EOL);
2570 	if (error)
2571 		goto out;
2572 
2573 	error = sysctl_createv(log, 0, &rnode, &txnode,
2574 	    0, CTLTYPE_NODE, "tx",
2575 	    SYSCTL_DESCR("virtio-net information and settings for Tx"),
2576 	    NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL);
2577 	if (error)
2578 		goto out;
2579 
2580 	error = sysctl_createv(log, 0, &txnode, NULL,
2581 	    CTLFLAG_READWRITE, CTLTYPE_INT, "intr_process_limit",
2582 	    SYSCTL_DESCR("max number of Tx packets to process for interrupt processing"),
2583 	    NULL, 0, &sc->sc_tx_intr_process_limit, 0, CTL_CREATE, CTL_EOL);
2584 	if (error)
2585 		goto out;
2586 
2587 	error = sysctl_createv(log, 0, &txnode, NULL,
2588 	    CTLFLAG_READWRITE, CTLTYPE_INT, "process_limit",
2589 	    SYSCTL_DESCR("max number of Tx packets to process for deferred processing"),
2590 	    NULL, 0, &sc->sc_tx_process_limit, 0, CTL_CREATE, CTL_EOL);
2591 
2592 out:
2593 	if (error)
2594 		sysctl_teardown(log);
2595 
2596 	return error;
2597 }
2598 
2599 static void
2600 vioif_setup_stats(struct vioif_softc *sc)
2601 {
2602 	struct vioif_rxqueue *rxq;
2603 	struct vioif_txqueue *txq;
2604 	int i;
2605 
2606 	for (i = 0; i < sc->sc_max_nvq_pairs; i++) {
2607 		rxq = &sc->sc_rxq[i];
2608 		txq = &sc->sc_txq[i];
2609 
2610 		snprintf(txq->txq_evgroup, sizeof(txq->txq_evgroup), "%s-TX%d",
2611 		    device_xname(sc->sc_dev), i);
2612 		evcnt_attach_dynamic(&txq->txq_defrag_failed, EVCNT_TYPE_MISC,
2613 		    NULL, txq->txq_evgroup, "tx m_defrag() failed");
2614 		evcnt_attach_dynamic(&txq->txq_mbuf_load_failed, EVCNT_TYPE_MISC,
2615 		    NULL, txq->txq_evgroup, "tx dmamap load failed");
2616 		evcnt_attach_dynamic(&txq->txq_enqueue_reserve_failed, EVCNT_TYPE_MISC,
2617 		    NULL, txq->txq_evgroup, "virtio_enqueue_reserve failed");
2618 
2619 		snprintf(rxq->rxq_evgroup, sizeof(rxq->rxq_evgroup), "%s-RX%d",
2620 		    device_xname(sc->sc_dev), i);
2621 		evcnt_attach_dynamic(&rxq->rxq_mbuf_add_failed, EVCNT_TYPE_MISC,
2622 		    NULL, rxq->rxq_evgroup, "rx mbuf allocation failed");
2623 	}
2624 
2625 	evcnt_attach_dynamic(&sc->sc_ctrlq.ctrlq_cmd_load_failed, EVCNT_TYPE_MISC,
2626 	    NULL, device_xname(sc->sc_dev), "control command dmamap load failed");
2627 	evcnt_attach_dynamic(&sc->sc_ctrlq.ctrlq_cmd_failed, EVCNT_TYPE_MISC,
2628 	    NULL, device_xname(sc->sc_dev), "control command failed");
2629 }
2630 
2631 MODULE(MODULE_CLASS_DRIVER, if_vioif, "virtio");
2632 
2633 #ifdef _MODULE
2634 #include "ioconf.c"
2635 #endif
2636 
2637 static int
2638 if_vioif_modcmd(modcmd_t cmd, void *opaque)
2639 {
2640 	int error = 0;
2641 
2642 #ifdef _MODULE
2643 	switch (cmd) {
2644 	case MODULE_CMD_INIT:
2645 		error = config_init_component(cfdriver_ioconf_if_vioif,
2646 		    cfattach_ioconf_if_vioif, cfdata_ioconf_if_vioif);
2647 		break;
2648 	case MODULE_CMD_FINI:
2649 		error = config_fini_component(cfdriver_ioconf_if_vioif,
2650 		    cfattach_ioconf_if_vioif, cfdata_ioconf_if_vioif);
2651 		break;
2652 	default:
2653 		error = ENOTTY;
2654 		break;
2655 	}
2656 #endif
2657 
2658 	return error;
2659 }
2660