xref: /netbsd-src/sys/dev/pci/if_vioif.c (revision 965ff70d6cc168e208e3ec6b725c8ce156e95fd0)
1 /*	$NetBSD: if_vioif.c,v 1.113 2024/11/10 11:46:11 mlelstv Exp $	*/
2 
3 /*
4  * Copyright (c) 2020 The NetBSD Foundation, Inc.
5  * Copyright (c) 2010 Minoura Makoto.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __KERNEL_RCSID(0, "$NetBSD: if_vioif.c,v 1.113 2024/11/10 11:46:11 mlelstv Exp $");
31 
32 #ifdef _KERNEL_OPT
33 #include "opt_net_mpsafe.h"
34 #endif
35 
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/kernel.h>
39 #include <sys/atomic.h>
40 #include <sys/bus.h>
41 #include <sys/condvar.h>
42 #include <sys/device.h>
43 #include <sys/evcnt.h>
44 #include <sys/intr.h>
45 #include <sys/kmem.h>
46 #include <sys/mbuf.h>
47 #include <sys/mutex.h>
48 #include <sys/sockio.h>
49 #include <sys/syslog.h>
50 #include <sys/cpu.h>
51 #include <sys/module.h>
52 #include <sys/pcq.h>
53 #include <sys/workqueue.h>
54 #include <sys/xcall.h>
55 
56 #include <dev/pci/virtioreg.h>
57 #include <dev/pci/virtiovar.h>
58 
59 #include <net/if.h>
60 #include <net/if_dl.h>
61 #include <net/if_media.h>
62 #include <net/if_ether.h>
63 
64 #include <net/bpf.h>
65 
66 #include "ioconf.h"
67 
68 #ifdef NET_MPSAFE
69 #define VIOIF_MPSAFE	1
70 #define VIOIF_MULTIQ	1
71 #endif
72 
73 /*
74  * if_vioifreg.h:
75  */
76 /* Configuration registers */
77 #define VIRTIO_NET_CONFIG_MAC		 0 /* 8bit x 6byte */
78 #define VIRTIO_NET_CONFIG_STATUS	 6 /* 16bit */
79 #define VIRTIO_NET_CONFIG_MAX_VQ_PAIRS	 8 /* 16bit */
80 #define VIRTIO_NET_CONFIG_MTU		10 /* 16bit */
81 
82 /* Feature bits */
83 #define VIRTIO_NET_F_CSUM		__BIT(0)
84 #define VIRTIO_NET_F_GUEST_CSUM		__BIT(1)
85 #define VIRTIO_NET_F_MAC		__BIT(5)
86 #define VIRTIO_NET_F_GSO		__BIT(6)
87 #define VIRTIO_NET_F_GUEST_TSO4		__BIT(7)
88 #define VIRTIO_NET_F_GUEST_TSO6		__BIT(8)
89 #define VIRTIO_NET_F_GUEST_ECN		__BIT(9)
90 #define VIRTIO_NET_F_GUEST_UFO		__BIT(10)
91 #define VIRTIO_NET_F_HOST_TSO4		__BIT(11)
92 #define VIRTIO_NET_F_HOST_TSO6		__BIT(12)
93 #define VIRTIO_NET_F_HOST_ECN		__BIT(13)
94 #define VIRTIO_NET_F_HOST_UFO		__BIT(14)
95 #define VIRTIO_NET_F_MRG_RXBUF		__BIT(15)
96 #define VIRTIO_NET_F_STATUS		__BIT(16)
97 #define VIRTIO_NET_F_CTRL_VQ		__BIT(17)
98 #define VIRTIO_NET_F_CTRL_RX		__BIT(18)
99 #define VIRTIO_NET_F_CTRL_VLAN		__BIT(19)
100 #define VIRTIO_NET_F_CTRL_RX_EXTRA	__BIT(20)
101 #define VIRTIO_NET_F_GUEST_ANNOUNCE	__BIT(21)
102 #define VIRTIO_NET_F_MQ			__BIT(22)
103 #define VIRTIO_NET_F_CTRL_MAC_ADDR 	__BIT(23)
104 
105 #define VIRTIO_NET_FLAG_BITS			\
106 	VIRTIO_COMMON_FLAG_BITS			\
107 	"b\x17" "CTRL_MAC\0"			\
108 	"b\x16" "MQ\0"				\
109 	"b\x15" "GUEST_ANNOUNCE\0"		\
110 	"b\x14" "CTRL_RX_EXTRA\0"		\
111 	"b\x13" "CTRL_VLAN\0"			\
112 	"b\x12" "CTRL_RX\0"			\
113 	"b\x11" "CTRL_VQ\0"			\
114 	"b\x10" "STATUS\0"			\
115 	"b\x0f" "MRG_RXBUF\0"			\
116 	"b\x0e" "HOST_UFO\0"			\
117 	"b\x0d" "HOST_ECN\0"			\
118 	"b\x0c" "HOST_TSO6\0"			\
119 	"b\x0b" "HOST_TSO4\0"			\
120 	"b\x0a" "GUEST_UFO\0"			\
121 	"b\x09" "GUEST_ECN\0"			\
122 	"b\x08" "GUEST_TSO6\0"			\
123 	"b\x07" "GUEST_TSO4\0"			\
124 	"b\x06" "GSO\0"				\
125 	"b\x05" "MAC\0"				\
126 	"b\x01" "GUEST_CSUM\0"			\
127 	"b\x00" "CSUM\0"
128 
129 /* Status */
130 #define VIRTIO_NET_S_LINK_UP	1
131 
132 /* Packet header structure */
133 struct virtio_net_hdr {
134 	uint8_t		flags;
135 	uint8_t		gso_type;
136 	uint16_t	hdr_len;
137 	uint16_t	gso_size;
138 	uint16_t	csum_start;
139 	uint16_t	csum_offset;
140 
141 	uint16_t	num_buffers; /* VIRTIO_NET_F_MRG_RXBUF enabled or v1 */
142 } __packed;
143 
144 #define VIRTIO_NET_HDR_F_NEEDS_CSUM	1 /* flags */
145 #define VIRTIO_NET_HDR_GSO_NONE		0 /* gso_type */
146 #define VIRTIO_NET_HDR_GSO_TCPV4	1 /* gso_type */
147 #define VIRTIO_NET_HDR_GSO_UDP		3 /* gso_type */
148 #define VIRTIO_NET_HDR_GSO_TCPV6	4 /* gso_type */
149 #define VIRTIO_NET_HDR_GSO_ECN		0x80 /* gso_type, |'ed */
150 
151 #define VIRTIO_NET_MAX_GSO_LEN		(65536+ETHER_HDR_LEN)
152 
153 /* Control virtqueue */
154 struct virtio_net_ctrl_cmd {
155 	uint8_t	class;
156 	uint8_t	command;
157 } __packed;
158 #define VIRTIO_NET_CTRL_RX		0
159 # define VIRTIO_NET_CTRL_RX_PROMISC	0
160 # define VIRTIO_NET_CTRL_RX_ALLMULTI	1
161 
162 #define VIRTIO_NET_CTRL_MAC		1
163 # define VIRTIO_NET_CTRL_MAC_TABLE_SET	0
164 # define  VIRTIO_NET_CTRL_MAC_ADDR_SET	1
165 
166 #define VIRTIO_NET_CTRL_VLAN		2
167 # define VIRTIO_NET_CTRL_VLAN_ADD	0
168 # define VIRTIO_NET_CTRL_VLAN_DEL	1
169 
170 #define VIRTIO_NET_CTRL_MQ			4
171 # define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET	0
172 # define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN	1
173 # define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX	0x8000
174 
175 struct virtio_net_ctrl_status {
176 	uint8_t	ack;
177 } __packed;
178 #define VIRTIO_NET_OK			0
179 #define VIRTIO_NET_ERR			1
180 
181 struct virtio_net_ctrl_rx {
182 	uint8_t	onoff;
183 } __packed;
184 
185 struct virtio_net_ctrl_mac_tbl {
186 	uint32_t nentries;
187 	uint8_t macs[][ETHER_ADDR_LEN];
188 } __packed;
189 
190 struct virtio_net_ctrl_mac_addr {
191 	uint8_t mac[ETHER_ADDR_LEN];
192 } __packed;
193 
194 struct virtio_net_ctrl_vlan {
195 	uint16_t id;
196 } __packed;
197 
198 struct virtio_net_ctrl_mq {
199 	uint16_t virtqueue_pairs;
200 } __packed;
201 
202 /*
203  * if_vioifvar.h:
204  */
205 
206 /*
207  * Locking notes:
208  * + a field in vioif_netqueue is protected by netq_lock (a spin mutex)
209  *      - more than one lock cannot be held at onece
210  * + a field in vioif_tx_context and vioif_rx_context is also protected
211  *   by netq_lock.
212  * + ctrlq_inuse is protected by ctrlq_wait_lock.
213  *      - other fields in vioif_ctrlqueue are protected by ctrlq_inuse
214  *      - netq_lock cannot be held along with ctrlq_wait_lock
215  * + fields in vioif_softc except queues are protected by
216  *   sc->sc_lock(an adaptive mutex)
217  *      - the lock is held before acquisition of other locks
218  */
219 
220 struct vioif_ctrl_cmdspec {
221 	bus_dmamap_t	dmamap;
222 	void		*buf;
223 	bus_size_t	bufsize;
224 };
225 
226 struct vioif_work {
227 	struct work	 cookie;
228 	void		(*func)(void *);
229 	void		*arg;
230 	unsigned int	 added;
231 };
232 
233 struct vioif_net_map {
234 	struct virtio_net_hdr	*vnm_hdr;
235 	bus_dmamap_t		 vnm_hdr_map;
236 	struct mbuf		*vnm_mbuf;
237 	bus_dmamap_t		 vnm_mbuf_map;
238 };
239 
240 #define VIOIF_NETQ_RX		0
241 #define VIOIF_NETQ_TX		1
242 #define VIOIF_NETQ_IDX		2
243 #define VIOIF_NETQ_DIR(n)	((n) % VIOIF_NETQ_IDX)
244 #define VIOIF_NETQ_PAIRIDX(n)	((n) / VIOIF_NETQ_IDX)
245 #define VIOIF_NETQ_RXQID(n)	((n) * VIOIF_NETQ_IDX + VIOIF_NETQ_RX)
246 #define VIOIF_NETQ_TXQID(n)	((n) * VIOIF_NETQ_IDX + VIOIF_NETQ_TX)
247 
248 struct vioif_netqueue {
249 	kmutex_t		 netq_lock;
250 	struct virtqueue	*netq_vq;
251 	bool			 netq_stopping;
252 	bool			 netq_running_handle;
253 	void			*netq_maps_kva;
254 	struct vioif_net_map	*netq_maps;
255 
256 	void			*netq_softint;
257 	struct vioif_work	 netq_work;
258 	bool			 netq_workqueue;
259 
260 	char			 netq_evgroup[32];
261 	struct evcnt		 netq_mbuf_load_failed;
262 	struct evcnt		 netq_enqueue_failed;
263 
264 	void			*netq_ctx;
265 };
266 
267 struct vioif_tx_context {
268 	bool			 txc_link_active;
269 	bool			 txc_no_free_slots;
270 	pcq_t			*txc_intrq;
271 	void			*txc_deferred_transmit;
272 
273 	struct evcnt		 txc_defrag_failed;
274 };
275 
276 struct vioif_rx_context {
277 	struct evcnt		 rxc_mbuf_enobufs;
278 };
279 struct vioif_ctrlqueue {
280 	struct virtqueue		*ctrlq_vq;
281 	enum {
282 		FREE, INUSE, DONE
283 	}				ctrlq_inuse;
284 	kcondvar_t			ctrlq_wait;
285 	kmutex_t			ctrlq_wait_lock;
286 	struct lwp			*ctrlq_owner;
287 
288 	struct virtio_net_ctrl_cmd	*ctrlq_cmd;
289 	struct virtio_net_ctrl_status	*ctrlq_status;
290 	struct virtio_net_ctrl_rx	*ctrlq_rx;
291 	struct virtio_net_ctrl_mac_tbl	*ctrlq_mac_tbl_uc;
292 	struct virtio_net_ctrl_mac_tbl	*ctrlq_mac_tbl_mc;
293 	struct virtio_net_ctrl_mac_addr	*ctrlq_mac_addr;
294 	struct virtio_net_ctrl_mq	*ctrlq_mq;
295 
296 	bus_dmamap_t			ctrlq_cmd_dmamap;
297 	bus_dmamap_t			ctrlq_status_dmamap;
298 	bus_dmamap_t			ctrlq_rx_dmamap;
299 	bus_dmamap_t			ctrlq_tbl_uc_dmamap;
300 	bus_dmamap_t			ctrlq_tbl_mc_dmamap;
301 	bus_dmamap_t			ctrlq_mac_addr_dmamap;
302 	bus_dmamap_t			ctrlq_mq_dmamap;
303 
304 	struct evcnt			ctrlq_cmd_load_failed;
305 	struct evcnt			ctrlq_cmd_failed;
306 };
307 
308 struct vioif_softc {
309 	device_t		sc_dev;
310 	kmutex_t		sc_lock;
311 	struct sysctllog	*sc_sysctllog;
312 
313 	struct virtio_softc	*sc_virtio;
314 	struct virtqueue	*sc_vqs;
315 	u_int			 sc_hdr_size;
316 
317 	int			sc_max_nvq_pairs;
318 	int			sc_req_nvq_pairs;
319 	int			sc_act_nvq_pairs;
320 
321 	uint8_t			sc_mac[ETHER_ADDR_LEN];
322 	struct ethercom		sc_ethercom;
323 	int			sc_link_state;
324 
325 	struct vioif_netqueue	*sc_netqs;
326 
327 	bool			sc_has_ctrl;
328 	struct vioif_ctrlqueue	sc_ctrlq;
329 
330 	bus_dma_segment_t	 sc_segs[1];
331 	void			*sc_dmamem;
332 	void			*sc_kmem;
333 
334 	void			*sc_cfg_softint;
335 
336 	struct workqueue	*sc_txrx_workqueue;
337 	bool			 sc_txrx_workqueue_sysctl;
338 	u_int			 sc_tx_intr_process_limit;
339 	u_int			 sc_tx_process_limit;
340 	u_int			 sc_rx_intr_process_limit;
341 	u_int			 sc_rx_process_limit;
342 };
343 #define VIRTIO_NET_TX_MAXNSEGS		(16) /* XXX */
344 #define VIRTIO_NET_CTRL_MAC_MAXENTRIES	(64) /* XXX */
345 
346 #define VIOIF_TX_INTR_PROCESS_LIMIT	256
347 #define VIOIF_TX_PROCESS_LIMIT		256
348 #define VIOIF_RX_INTR_PROCESS_LIMIT	0U
349 #define VIOIF_RX_PROCESS_LIMIT		256
350 
351 #define VIOIF_WORKQUEUE_PRI		PRI_SOFTNET
352 #define VIOIF_IS_LINK_ACTIVE(_sc)	((_sc)->sc_link_state == LINK_STATE_UP ? \
353 					    true : false)
354 
355 /* cfattach interface functions */
356 static int	vioif_match(device_t, cfdata_t, void *);
357 static void	vioif_attach(device_t, device_t, void *);
358 static int	vioif_finalize_teardown(device_t);
359 
360 /* ifnet interface functions */
361 static int	vioif_init(struct ifnet *);
362 static void	vioif_stop(struct ifnet *, int);
363 static void	vioif_start(struct ifnet *);
364 static int	vioif_transmit(struct ifnet *, struct mbuf *);
365 static int	vioif_ioctl(struct ifnet *, u_long, void *);
366 static void	vioif_watchdog(struct ifnet *);
367 static int	vioif_ifflags(struct vioif_softc *);
368 static int	vioif_ifflags_cb(struct ethercom *);
369 
370 /* tx & rx */
371 static int	vioif_netqueue_init(struct vioif_softc *,
372 		    struct virtio_softc *, size_t, u_int);
373 static void	vioif_netqueue_teardown(struct vioif_softc *,
374 		    struct virtio_softc *, size_t);
375 static void	vioif_net_intr_enable(struct vioif_softc *,
376 		    struct virtio_softc *);
377 static void	vioif_net_intr_disable(struct vioif_softc *,
378 		    struct virtio_softc *);
379 static void	vioif_net_sched_handle(struct vioif_softc *,
380 		    struct vioif_netqueue *);
381 
382 /* rx */
383 static void	vioif_populate_rx_mbufs_locked(struct vioif_softc *,
384 		    struct vioif_netqueue *);
385 static int	vioif_rx_intr(void *);
386 static void	vioif_rx_handle(void *);
387 static void	vioif_rx_queue_clear(struct vioif_softc *,
388 		    struct virtio_softc *, struct vioif_netqueue *);
389 
390 /* tx */
391 static void	vioif_start_locked(struct ifnet *, struct vioif_netqueue *);
392 static void	vioif_transmit_locked(struct ifnet *, struct vioif_netqueue *);
393 static void	vioif_deferred_transmit(void *);
394 static int	vioif_tx_intr(void *);
395 static void	vioif_tx_handle(void *);
396 static void	vioif_tx_queue_clear(struct vioif_softc *, struct virtio_softc *,
397 		    struct vioif_netqueue *);
398 
399 /* controls */
400 static int	vioif_ctrl_intr(void *);
401 static int	vioif_ctrl_rx(struct vioif_softc *, int, bool);
402 static int	vioif_set_promisc(struct vioif_softc *, bool);
403 static int	vioif_set_allmulti(struct vioif_softc *, bool);
404 static int	vioif_set_rx_filter(struct vioif_softc *);
405 static int	vioif_rx_filter(struct vioif_softc *);
406 static int	vioif_set_mac_addr(struct vioif_softc *);
407 static int	vioif_ctrl_mq_vq_pairs_set(struct vioif_softc *, int);
408 
409 /* config interrupt */
410 static int	vioif_config_change(struct virtio_softc *);
411 static void	vioif_cfg_softint(void *);
412 static void	vioif_update_link_status(struct vioif_softc *);
413 
414 /* others */
415 static void	vioif_alloc_queues(struct vioif_softc *);
416 static void	vioif_free_queues(struct vioif_softc *);
417 static int	vioif_alloc_mems(struct vioif_softc *);
418 static struct workqueue*
419 		vioif_workq_create(const char *, pri_t, int, int);
420 static void	vioif_workq_destroy(struct workqueue *);
421 static void	vioif_work_set(struct vioif_work *, void(*)(void *), void *);
422 static void	vioif_work_add(struct workqueue *, struct vioif_work *);
423 static void	vioif_work_wait(struct workqueue *, struct vioif_work *);
424 static int	vioif_setup_sysctl(struct vioif_softc *);
425 static void	vioif_setup_stats(struct vioif_softc *);
426 
427 CFATTACH_DECL_NEW(vioif, sizeof(struct vioif_softc),
428 		  vioif_match, vioif_attach, NULL, NULL);
429 
430 static void
431 vioif_intr_barrier(void)
432 {
433 
434 	/* wait for finish all interrupt handler */
435 	xc_barrier(0);
436 }
437 
438 static void
439 vioif_notify(struct virtio_softc *vsc, struct virtqueue *vq)
440 {
441 
442 	virtio_enqueue_commit(vsc, vq, -1, true);
443 }
444 
445 static int
446 vioif_match(device_t parent, cfdata_t match, void *aux)
447 {
448 	struct virtio_attach_args *va = aux;
449 
450 	if (va->sc_childdevid == VIRTIO_DEVICE_ID_NETWORK)
451 		return 1;
452 
453 	return 0;
454 }
455 
456 static void
457 vioif_attach(device_t parent, device_t self, void *aux)
458 {
459 	struct vioif_softc *sc = device_private(self);
460 	struct virtio_softc *vsc = device_private(parent);
461 	struct vioif_netqueue *txq0;
462 	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
463 	uint64_t features, req_features;
464 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
465 	u_int softint_flags;
466 	int r, i, req_flags;
467 	char xnamebuf[MAXCOMLEN];
468 	size_t nvqs;
469 
470 	if (virtio_child(vsc) != NULL) {
471 		aprint_normal(": child already attached for %s; "
472 		    "something wrong...\n", device_xname(parent));
473 		return;
474 	}
475 
476 	sc->sc_dev = self;
477 	sc->sc_virtio = vsc;
478 	sc->sc_link_state = LINK_STATE_UNKNOWN;
479 
480 	sc->sc_max_nvq_pairs = 1;
481 	sc->sc_req_nvq_pairs = 1;
482 	sc->sc_act_nvq_pairs = 1;
483 	sc->sc_txrx_workqueue_sysctl = true;
484 	sc->sc_tx_intr_process_limit = VIOIF_TX_INTR_PROCESS_LIMIT;
485 	sc->sc_tx_process_limit = VIOIF_TX_PROCESS_LIMIT;
486 	sc->sc_rx_intr_process_limit = VIOIF_RX_INTR_PROCESS_LIMIT;
487 	sc->sc_rx_process_limit = VIOIF_RX_PROCESS_LIMIT;
488 
489 	mutex_init(&sc->sc_lock, MUTEX_DEFAULT, IPL_NONE);
490 
491 	snprintf(xnamebuf, sizeof(xnamebuf), "%s_txrx", device_xname(self));
492 	sc->sc_txrx_workqueue = vioif_workq_create(xnamebuf, VIOIF_WORKQUEUE_PRI,
493 	    IPL_NET, WQ_PERCPU | WQ_MPSAFE);
494 	if (sc->sc_txrx_workqueue == NULL)
495 		goto err;
496 
497 	req_flags = 0;
498 
499 #ifdef VIOIF_MPSAFE
500 	req_flags |= VIRTIO_F_INTR_MPSAFE;
501 #endif
502 	req_flags |= VIRTIO_F_INTR_MSIX;
503 
504 	req_features =
505 	    VIRTIO_NET_F_MAC | VIRTIO_NET_F_STATUS | VIRTIO_NET_F_CTRL_VQ |
506 	    VIRTIO_NET_F_CTRL_RX | VIRTIO_F_NOTIFY_ON_EMPTY;
507 	req_features |= VIRTIO_F_RING_EVENT_IDX;
508 	req_features |= VIRTIO_NET_F_CTRL_MAC_ADDR;
509 #ifdef VIOIF_MULTIQ
510 	req_features |= VIRTIO_NET_F_MQ;
511 #endif
512 
513 	virtio_child_attach_start(vsc, self, IPL_NET,
514 	    req_features, VIRTIO_NET_FLAG_BITS);
515 	features = virtio_features(vsc);
516 
517 	if (features == 0)
518 		goto err;
519 
520 	if (features & VIRTIO_NET_F_MAC) {
521 		for (i = 0; i < __arraycount(sc->sc_mac); i++) {
522 			sc->sc_mac[i] = virtio_read_device_config_1(vsc,
523 			    VIRTIO_NET_CONFIG_MAC + i);
524 		}
525 	} else {
526 		/* code stolen from sys/net/if_tap.c */
527 		struct timeval tv;
528 		uint32_t ui;
529 		getmicrouptime(&tv);
530 		ui = (tv.tv_sec ^ tv.tv_usec) & 0xffffff;
531 		memcpy(sc->sc_mac+3, (uint8_t *)&ui, 3);
532 		for (i = 0; i < __arraycount(sc->sc_mac); i++) {
533 			virtio_write_device_config_1(vsc,
534 			    VIRTIO_NET_CONFIG_MAC + i, sc->sc_mac[i]);
535 		}
536 	}
537 
538 	/* 'Ethernet' with capital follows other ethernet driver attachment */
539 	aprint_normal_dev(self, "Ethernet address %s\n",
540 	    ether_sprintf(sc->sc_mac));
541 
542 	if (features & (VIRTIO_NET_F_MRG_RXBUF | VIRTIO_F_VERSION_1)) {
543 		sc->sc_hdr_size = sizeof(struct virtio_net_hdr);
544 	} else {
545 		sc->sc_hdr_size = offsetof(struct virtio_net_hdr, num_buffers);
546 	}
547 
548 	if ((features & VIRTIO_NET_F_CTRL_VQ) &&
549 	    (features & VIRTIO_NET_F_CTRL_RX)) {
550 		sc->sc_has_ctrl = true;
551 
552 		cv_init(&ctrlq->ctrlq_wait, "ctrl_vq");
553 		mutex_init(&ctrlq->ctrlq_wait_lock, MUTEX_DEFAULT, IPL_NET);
554 		ctrlq->ctrlq_inuse = FREE;
555 	} else {
556 		sc->sc_has_ctrl = false;
557 	}
558 
559 	if (sc->sc_has_ctrl && (features & VIRTIO_NET_F_MQ)) {
560 		sc->sc_max_nvq_pairs = virtio_read_device_config_2(vsc,
561 		    VIRTIO_NET_CONFIG_MAX_VQ_PAIRS);
562 
563 		if (sc->sc_max_nvq_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX)
564 			goto err;
565 
566 		/* Limit the number of queue pairs to use */
567 		sc->sc_req_nvq_pairs = MIN(sc->sc_max_nvq_pairs, ncpu);
568 
569 		if (sc->sc_max_nvq_pairs > 1)
570 			req_flags |= VIRTIO_F_INTR_PERVQ;
571 	}
572 
573 	vioif_alloc_queues(sc);
574 
575 #ifdef VIOIF_MPSAFE
576 	softint_flags = SOFTINT_NET | SOFTINT_MPSAFE;
577 #else
578 	softint_flags = SOFTINT_NET;
579 #endif
580 
581 	/*
582 	 * Initialize network queues
583 	 */
584 	nvqs = sc->sc_max_nvq_pairs * 2;
585 	for (i = 0; i < nvqs; i++) {
586 		r = vioif_netqueue_init(sc, vsc, i, softint_flags);
587 		if (r != 0)
588 			goto err;
589 	}
590 
591 	if (sc->sc_has_ctrl) {
592 		int ctrlq_idx = nvqs;
593 
594 		nvqs++;
595 		/*
596 		 * Allocating a virtqueue for control channel
597 		 */
598 		sc->sc_ctrlq.ctrlq_vq = &sc->sc_vqs[ctrlq_idx];
599 		virtio_init_vq(vsc, ctrlq->ctrlq_vq, ctrlq_idx,
600 		    vioif_ctrl_intr, ctrlq);
601 
602 		r = virtio_alloc_vq(vsc, ctrlq->ctrlq_vq, NBPG, 1, "control");
603 		if (r != 0) {
604 			aprint_error_dev(self, "failed to allocate "
605 			    "a virtqueue for control channel, error code %d\n",
606 			    r);
607 
608 			sc->sc_has_ctrl = false;
609 			cv_destroy(&ctrlq->ctrlq_wait);
610 			mutex_destroy(&ctrlq->ctrlq_wait_lock);
611 		}
612 	}
613 
614 	sc->sc_cfg_softint = softint_establish(softint_flags,
615 	    vioif_cfg_softint, sc);
616 	if (sc->sc_cfg_softint == NULL) {
617 		aprint_error_dev(self, "cannot establish ctl softint\n");
618 		goto err;
619 	}
620 
621 	if (vioif_alloc_mems(sc) < 0)
622 		goto err;
623 
624 	r = virtio_child_attach_finish(vsc, sc->sc_vqs, nvqs,
625 	    vioif_config_change, req_flags);
626 	if (r != 0)
627 		goto err;
628 
629 	if (vioif_setup_sysctl(sc) != 0) {
630 		aprint_error_dev(self, "unable to create sysctl node\n");
631 		/* continue */
632 	}
633 
634 	vioif_setup_stats(sc);
635 
636 	strlcpy(ifp->if_xname, device_xname(self), IFNAMSIZ);
637 	ifp->if_softc = sc;
638 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
639 #ifdef VIOIF_MPSAFE
640 	ifp->if_extflags = IFEF_MPSAFE;
641 #endif
642 	ifp->if_start = vioif_start;
643 	if (sc->sc_req_nvq_pairs > 1)
644 		ifp->if_transmit = vioif_transmit;
645 	ifp->if_ioctl = vioif_ioctl;
646 	ifp->if_init = vioif_init;
647 	ifp->if_stop = vioif_stop;
648 	ifp->if_capabilities = 0;
649 	ifp->if_watchdog = vioif_watchdog;
650 	txq0 = &sc->sc_netqs[VIOIF_NETQ_TXQID(0)];
651 	IFQ_SET_MAXLEN(&ifp->if_snd, MAX(txq0->netq_vq->vq_num, IFQ_MAXLEN));
652 	IFQ_SET_READY(&ifp->if_snd);
653 
654 	sc->sc_ethercom.ec_capabilities |= ETHERCAP_VLAN_MTU;
655 
656 	if_attach(ifp);
657 	if_deferred_start_init(ifp, NULL);
658 	ether_ifattach(ifp, sc->sc_mac);
659 	ether_set_ifflags_cb(&sc->sc_ethercom, vioif_ifflags_cb);
660 
661 	return;
662 
663 err:
664 	nvqs = sc->sc_max_nvq_pairs * 2;
665 	for (i = 0; i < nvqs; i++) {
666 		vioif_netqueue_teardown(sc, vsc, i);
667 	}
668 
669 	if (sc->sc_has_ctrl) {
670 		cv_destroy(&ctrlq->ctrlq_wait);
671 		mutex_destroy(&ctrlq->ctrlq_wait_lock);
672 		virtio_free_vq(vsc, ctrlq->ctrlq_vq);
673 		ctrlq->ctrlq_vq = NULL;
674 	}
675 
676 	vioif_free_queues(sc);
677 	mutex_destroy(&sc->sc_lock);
678 	virtio_child_attach_failed(vsc);
679 	config_finalize_register(self, vioif_finalize_teardown);
680 
681 	return;
682 }
683 
684 static int
685 vioif_finalize_teardown(device_t self)
686 {
687 	struct vioif_softc *sc = device_private(self);
688 
689 	if (sc->sc_txrx_workqueue != NULL) {
690 		vioif_workq_destroy(sc->sc_txrx_workqueue);
691 		sc->sc_txrx_workqueue = NULL;
692 	}
693 
694 	return 0;
695 }
696 
697 /*
698  * Interface functions for ifnet
699  */
700 static int
701 vioif_init(struct ifnet *ifp)
702 {
703 	struct vioif_softc *sc = ifp->if_softc;
704 	struct virtio_softc *vsc = sc->sc_virtio;
705 	struct vioif_netqueue *netq;
706 	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
707 	int r, i;
708 
709 	vioif_stop(ifp, 0);
710 
711 	r = virtio_reinit_start(vsc);
712 	if (r != 0) {
713 		log(LOG_ERR, "%s: reset failed\n", ifp->if_xname);
714 		return EIO;
715 	}
716 
717 	virtio_negotiate_features(vsc, virtio_features(vsc));
718 
719 	for (i = 0; i < sc->sc_req_nvq_pairs; i++) {
720 		netq = &sc->sc_netqs[VIOIF_NETQ_RXQID(i)];
721 
722 		mutex_enter(&netq->netq_lock);
723 		vioif_populate_rx_mbufs_locked(sc, netq);
724 		mutex_exit(&netq->netq_lock);
725 	}
726 
727 	virtio_reinit_end(vsc);
728 
729 	if (sc->sc_has_ctrl)
730 		virtio_start_vq_intr(vsc, ctrlq->ctrlq_vq);
731 
732 	r = vioif_ctrl_mq_vq_pairs_set(sc, sc->sc_req_nvq_pairs);
733 	if (r == 0)
734 		sc->sc_act_nvq_pairs = sc->sc_req_nvq_pairs;
735 	else
736 		sc->sc_act_nvq_pairs = 1;
737 
738 	SET(ifp->if_flags, IFF_RUNNING);
739 
740 	vioif_net_intr_enable(sc, vsc);
741 
742 	vioif_update_link_status(sc);
743 	r = vioif_rx_filter(sc);
744 
745 	return r;
746 }
747 
748 static void
749 vioif_stop(struct ifnet *ifp, int disable)
750 {
751 	struct vioif_softc *sc = ifp->if_softc;
752 	struct virtio_softc *vsc = sc->sc_virtio;
753 	struct vioif_netqueue *netq;
754 	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
755 	size_t i, act_qnum;
756 
757 	act_qnum = sc->sc_act_nvq_pairs * 2;
758 
759 	CLR(ifp->if_flags, IFF_RUNNING);
760 	for (i = 0; i < act_qnum; i++) {
761 		netq = &sc->sc_netqs[i];
762 
763 		mutex_enter(&netq->netq_lock);
764 		netq->netq_stopping = true;
765 		mutex_exit(&netq->netq_lock);
766 	}
767 
768 	/* disable interrupts */
769 	vioif_net_intr_disable(sc, vsc);
770 	if (sc->sc_has_ctrl)
771 		virtio_stop_vq_intr(vsc, ctrlq->ctrlq_vq);
772 
773 	/*
774 	 * only way to stop interrupt, I/O and DMA is resetting...
775 	 *
776 	 * NOTE: Devices based on VirtIO draft specification can not
777 	 * stop interrupt completely even if virtio_stop_vq_intr() is called.
778 	 */
779 	virtio_reset(vsc);
780 
781 	vioif_intr_barrier();
782 
783 	for (i = 0; i < act_qnum; i++) {
784 		netq = &sc->sc_netqs[i];
785 		vioif_work_wait(sc->sc_txrx_workqueue, &netq->netq_work);
786 	}
787 
788 	for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
789 		netq = &sc->sc_netqs[VIOIF_NETQ_RXQID(i)];
790 		vioif_rx_queue_clear(sc, vsc, netq);
791 
792 		netq = &sc->sc_netqs[VIOIF_NETQ_TXQID(i)];
793 		vioif_tx_queue_clear(sc, vsc, netq);
794 	}
795 
796 	/* all packet processing is stopped */
797 	for (i = 0; i < act_qnum; i++) {
798 		netq = &sc->sc_netqs[i];
799 
800 		mutex_enter(&netq->netq_lock);
801 		netq->netq_stopping = false;
802 		mutex_exit(&netq->netq_lock);
803 	}
804 }
805 
806 static void
807 vioif_start(struct ifnet *ifp)
808 {
809 	struct vioif_softc *sc = ifp->if_softc;
810 	struct vioif_netqueue *txq0 = &sc->sc_netqs[VIOIF_NETQ_TXQID(0)];
811 
812 #ifdef VIOIF_MPSAFE
813 	KASSERT(if_is_mpsafe(ifp));
814 #endif
815 
816 	mutex_enter(&txq0->netq_lock);
817 	vioif_start_locked(ifp, txq0);
818 	mutex_exit(&txq0->netq_lock);
819 }
820 
821 static inline int
822 vioif_select_txqueue(struct ifnet *ifp, struct mbuf *m)
823 {
824 	struct vioif_softc *sc = ifp->if_softc;
825 	u_int cpuid = cpu_index(curcpu());
826 
827 	return VIOIF_NETQ_TXQID(cpuid % sc->sc_act_nvq_pairs);
828 }
829 
830 static int
831 vioif_transmit(struct ifnet *ifp, struct mbuf *m)
832 {
833 	struct vioif_softc *sc = ifp->if_softc;
834 	struct vioif_netqueue *netq;
835 	struct vioif_tx_context *txc;
836 	int qid;
837 
838 	qid = vioif_select_txqueue(ifp, m);
839 	netq = &sc->sc_netqs[qid];
840 	txc = netq->netq_ctx;
841 
842 	if (__predict_false(!pcq_put(txc->txc_intrq, m))) {
843 		m_freem(m);
844 		return ENOBUFS;
845 	}
846 
847 	net_stat_ref_t nsr = IF_STAT_GETREF(ifp);
848 	if_statadd_ref(ifp, nsr, if_obytes, m->m_pkthdr.len);
849 	if (m->m_flags & M_MCAST)
850 		if_statinc_ref(ifp, nsr, if_omcasts);
851 	IF_STAT_PUTREF(ifp);
852 
853 	if (mutex_tryenter(&netq->netq_lock)) {
854 		vioif_transmit_locked(ifp, netq);
855 		mutex_exit(&netq->netq_lock);
856 	}
857 
858 	return 0;
859 }
860 
861 void
862 vioif_watchdog(struct ifnet *ifp)
863 {
864 	struct vioif_softc *sc = ifp->if_softc;
865 	struct vioif_netqueue *netq;
866 	int i;
867 
868 	if (ISSET(ifp->if_flags, IFF_RUNNING)) {
869 		if (ISSET(ifp->if_flags, IFF_DEBUG)) {
870 			log(LOG_DEBUG, "%s: watchdog timed out\n",
871 			    ifp->if_xname);
872 		}
873 
874 		for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
875 			netq = &sc->sc_netqs[VIOIF_NETQ_TXQID(i)];
876 
877 			mutex_enter(&netq->netq_lock);
878 			if (!netq->netq_running_handle) {
879 				netq->netq_running_handle = true;
880 				vioif_net_sched_handle(sc, netq);
881 			}
882 			mutex_exit(&netq->netq_lock);
883 		}
884 	}
885 }
886 
887 static int
888 vioif_ioctl(struct ifnet *ifp, u_long cmd, void *data)
889 {
890 	int s, r;
891 
892 	s = splnet();
893 
894 	r = ether_ioctl(ifp, cmd, data);
895 	if (r == ENETRESET && (cmd == SIOCADDMULTI || cmd == SIOCDELMULTI)) {
896 		if (ifp->if_flags & IFF_RUNNING) {
897 			r = vioif_rx_filter(ifp->if_softc);
898 		} else {
899 			r = 0;
900 		}
901 	}
902 
903 	splx(s);
904 
905 	return r;
906 }
907 
908 static int
909 vioif_ifflags(struct vioif_softc *sc)
910 {
911 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
912 	bool onoff;
913 	int r;
914 
915 	if (!sc->sc_has_ctrl) {
916 		/* no ctrl vq; always promisc and allmulti */
917 		ifp->if_flags |= (IFF_PROMISC | IFF_ALLMULTI);
918 		return 0;
919 	}
920 
921 	onoff = ifp->if_flags & IFF_ALLMULTI ? true : false;
922 	r = vioif_set_allmulti(sc, onoff);
923 	if (r != 0) {
924 		log(LOG_WARNING,
925 		    "%s: couldn't %sable ALLMULTI\n",
926 		    ifp->if_xname, onoff ? "en" : "dis");
927 		if (onoff) {
928 			CLR(ifp->if_flags, IFF_ALLMULTI);
929 		} else {
930 			SET(ifp->if_flags, IFF_ALLMULTI);
931 		}
932 	}
933 
934 	onoff = ifp->if_flags & IFF_PROMISC ? true : false;
935 	r = vioif_set_promisc(sc, onoff);
936 	if (r != 0) {
937 		log(LOG_WARNING,
938 		    "%s: couldn't %sable PROMISC\n",
939 		    ifp->if_xname, onoff ? "en" : "dis");
940 		if (onoff) {
941 			CLR(ifp->if_flags, IFF_PROMISC);
942 		} else {
943 			SET(ifp->if_flags, IFF_PROMISC);
944 		}
945 	}
946 
947 	return 0;
948 }
949 
950 static int
951 vioif_ifflags_cb(struct ethercom *ec)
952 {
953 	struct ifnet *ifp = &ec->ec_if;
954 	struct vioif_softc *sc = ifp->if_softc;
955 
956 	return vioif_ifflags(sc);
957 }
958 
959 static int
960 vioif_setup_sysctl(struct vioif_softc *sc)
961 {
962 	const char *devname;
963 	struct sysctllog **log;
964 	const struct sysctlnode *rnode, *rxnode, *txnode;
965 	int error;
966 
967 	log = &sc->sc_sysctllog;
968 	devname = device_xname(sc->sc_dev);
969 
970 	error = sysctl_createv(log, 0, NULL, &rnode,
971 	    0, CTLTYPE_NODE, devname,
972 	    SYSCTL_DESCR("virtio-net information and settings"),
973 	    NULL, 0, NULL, 0, CTL_HW, CTL_CREATE, CTL_EOL);
974 	if (error)
975 		goto out;
976 
977 	error = sysctl_createv(log, 0, &rnode, NULL,
978 	    CTLFLAG_READWRITE, CTLTYPE_BOOL, "txrx_workqueue",
979 	    SYSCTL_DESCR("Use workqueue for packet processing"),
980 	    NULL, 0, &sc->sc_txrx_workqueue_sysctl, 0, CTL_CREATE, CTL_EOL);
981 	if (error)
982 		goto out;
983 
984 	error = sysctl_createv(log, 0, &rnode, &rxnode,
985 	    0, CTLTYPE_NODE, "rx",
986 	    SYSCTL_DESCR("virtio-net information and settings for Rx"),
987 	    NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL);
988 	if (error)
989 		goto out;
990 
991 	error = sysctl_createv(log, 0, &rxnode, NULL,
992 	    CTLFLAG_READWRITE, CTLTYPE_INT, "intr_process_limit",
993 	    SYSCTL_DESCR("max number of Rx packets to process for interrupt processing"),
994 	    NULL, 0, &sc->sc_rx_intr_process_limit, 0, CTL_CREATE, CTL_EOL);
995 	if (error)
996 		goto out;
997 
998 	error = sysctl_createv(log, 0, &rxnode, NULL,
999 	    CTLFLAG_READWRITE, CTLTYPE_INT, "process_limit",
1000 	    SYSCTL_DESCR("max number of Rx packets to process for deferred processing"),
1001 	    NULL, 0, &sc->sc_rx_process_limit, 0, CTL_CREATE, CTL_EOL);
1002 	if (error)
1003 		goto out;
1004 
1005 	error = sysctl_createv(log, 0, &rnode, &txnode,
1006 	    0, CTLTYPE_NODE, "tx",
1007 	    SYSCTL_DESCR("virtio-net information and settings for Tx"),
1008 	    NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL);
1009 	if (error)
1010 		goto out;
1011 
1012 	error = sysctl_createv(log, 0, &txnode, NULL,
1013 	    CTLFLAG_READWRITE, CTLTYPE_INT, "intr_process_limit",
1014 	    SYSCTL_DESCR("max number of Tx packets to process for interrupt processing"),
1015 	    NULL, 0, &sc->sc_tx_intr_process_limit, 0, CTL_CREATE, CTL_EOL);
1016 	if (error)
1017 		goto out;
1018 
1019 	error = sysctl_createv(log, 0, &txnode, NULL,
1020 	    CTLFLAG_READWRITE, CTLTYPE_INT, "process_limit",
1021 	    SYSCTL_DESCR("max number of Tx packets to process for deferred processing"),
1022 	    NULL, 0, &sc->sc_tx_process_limit, 0, CTL_CREATE, CTL_EOL);
1023 
1024 out:
1025 	if (error)
1026 		sysctl_teardown(log);
1027 
1028 	return error;
1029 }
1030 
1031 static void
1032 vioif_setup_stats(struct vioif_softc *sc)
1033 {
1034 	struct vioif_netqueue *netq;
1035 	struct vioif_tx_context *txc;
1036 	struct vioif_rx_context *rxc;
1037 	size_t i, netq_num;
1038 
1039 	netq_num = sc->sc_max_nvq_pairs * 2;
1040 	for (i = 0; i < netq_num; i++) {
1041 		netq = &sc->sc_netqs[i];
1042 		evcnt_attach_dynamic(&netq->netq_mbuf_load_failed, EVCNT_TYPE_MISC,
1043 		    NULL, netq->netq_evgroup, "failed to load mbuf to DMA");
1044 		evcnt_attach_dynamic(&netq->netq_enqueue_failed,
1045 		    EVCNT_TYPE_MISC, NULL, netq->netq_evgroup,
1046 		    "virtqueue enqueue failed failed");
1047 
1048 		switch (VIOIF_NETQ_DIR(i)) {
1049 		case VIOIF_NETQ_RX:
1050 			rxc = netq->netq_ctx;
1051 			evcnt_attach_dynamic(&rxc->rxc_mbuf_enobufs,
1052 			    EVCNT_TYPE_MISC, NULL, netq->netq_evgroup,
1053 			    "no receive buffer");
1054 			break;
1055 		case VIOIF_NETQ_TX:
1056 			txc = netq->netq_ctx;
1057 			evcnt_attach_dynamic(&txc->txc_defrag_failed,
1058 			    EVCNT_TYPE_MISC, NULL, netq->netq_evgroup,
1059 			    "m_defrag() failed");
1060 			break;
1061 		}
1062 	}
1063 
1064 	evcnt_attach_dynamic(&sc->sc_ctrlq.ctrlq_cmd_load_failed, EVCNT_TYPE_MISC,
1065 	    NULL, device_xname(sc->sc_dev), "control command dmamap load failed");
1066 	evcnt_attach_dynamic(&sc->sc_ctrlq.ctrlq_cmd_failed, EVCNT_TYPE_MISC,
1067 	    NULL, device_xname(sc->sc_dev), "control command failed");
1068 }
1069 
1070 /*
1071  * allocate memory
1072  */
1073 static int
1074 vioif_dmamap_create(struct vioif_softc *sc, bus_dmamap_t *map,
1075     bus_size_t size, int nsegs, const char *usage)
1076 {
1077 	int r;
1078 
1079 	r = bus_dmamap_create(virtio_dmat(sc->sc_virtio), size,
1080 	    nsegs, size, 0, BUS_DMA_NOWAIT | BUS_DMA_ALLOCNOW, map);
1081 
1082 	if (r != 0) {
1083 		aprint_error_dev(sc->sc_dev, "%s dmamap creation failed, "
1084 		    "error code %d\n", usage, r);
1085 	}
1086 
1087 	return r;
1088 }
1089 
1090 static void
1091 vioif_dmamap_destroy(struct vioif_softc *sc, bus_dmamap_t *map)
1092 {
1093 
1094 	if (*map) {
1095 		bus_dmamap_destroy(virtio_dmat(sc->sc_virtio), *map);
1096 		*map = NULL;
1097 	}
1098 }
1099 
1100 static int
1101 vioif_dmamap_create_load(struct vioif_softc *sc, bus_dmamap_t *map,
1102     void *buf, bus_size_t size, int nsegs, int rw, const char *usage)
1103 {
1104 	int r;
1105 
1106 	r = vioif_dmamap_create(sc, map, size, nsegs, usage);
1107 	if (r != 0)
1108 		return 1;
1109 
1110 	r = bus_dmamap_load(virtio_dmat(sc->sc_virtio), *map, buf,
1111 	    size, NULL, rw | BUS_DMA_NOWAIT);
1112 	if (r != 0) {
1113 		vioif_dmamap_destroy(sc, map);
1114 		aprint_error_dev(sc->sc_dev, "%s dmamap load failed. "
1115 		    "error code %d\n", usage, r);
1116 	}
1117 
1118 	return r;
1119 }
1120 
1121 static void *
1122 vioif_assign_mem(intptr_t *p, size_t size)
1123 {
1124 	intptr_t rv;
1125 
1126 	rv = *p;
1127 	*p += size;
1128 
1129 	return (void *)rv;
1130 }
1131 
1132 /*
1133  * dma memory is used for:
1134  *   netq_maps_kva:	 metadata array for received frames (READ) and
1135  *			 sent frames (WRITE)
1136  *   ctrlq_cmd:		 command to be sent via ctrl vq (WRITE)
1137  *   ctrlq_status:	 return value for a command via ctrl vq (READ)
1138  *   ctrlq_rx:		 parameter for a VIRTIO_NET_CTRL_RX class command
1139  *			 (WRITE)
1140  *   ctrlq_mac_tbl_uc:	 unicast MAC address filter for a VIRTIO_NET_CTRL_MAC
1141  *			 class command (WRITE)
1142  *   ctrlq_mac_tbl_mc:	 multicast MAC address filter for a VIRTIO_NET_CTRL_MAC
1143  *			 class command (WRITE)
1144  * ctrlq_* structures are allocated only one each; they are protected by
1145  * ctrlq_inuse variable and ctrlq_wait condvar.
1146  */
1147 static int
1148 vioif_alloc_mems(struct vioif_softc *sc)
1149 {
1150 	struct virtio_softc *vsc = sc->sc_virtio;
1151 	struct vioif_netqueue *netq;
1152 	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
1153 	struct vioif_net_map *maps;
1154 	unsigned int vq_num;
1155 	int r, rsegs;
1156 	bus_size_t dmamemsize;
1157 	size_t qid, i, netq_num, kmemsize;
1158 	void *vaddr;
1159 	intptr_t p;
1160 
1161 	netq_num = sc->sc_max_nvq_pairs * 2;
1162 
1163 	/* allocate DMA memory */
1164 	dmamemsize = 0;
1165 
1166 	for (qid = 0; qid < netq_num; qid++) {
1167 		maps = sc->sc_netqs[qid].netq_maps;
1168 		vq_num = sc->sc_netqs[qid].netq_vq->vq_num;
1169 		dmamemsize += sizeof(*maps[0].vnm_hdr) * vq_num;
1170 	}
1171 
1172 	if (sc->sc_has_ctrl) {
1173 		dmamemsize += sizeof(struct virtio_net_ctrl_cmd);
1174 		dmamemsize += sizeof(struct virtio_net_ctrl_status);
1175 		dmamemsize += sizeof(struct virtio_net_ctrl_rx);
1176 		dmamemsize += sizeof(struct virtio_net_ctrl_mac_tbl)
1177 		    + ETHER_ADDR_LEN;
1178 		dmamemsize += sizeof(struct virtio_net_ctrl_mac_tbl)
1179 		    + ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_MAXENTRIES;
1180 		dmamemsize += sizeof(struct virtio_net_ctrl_mac_addr);
1181 		dmamemsize += sizeof(struct virtio_net_ctrl_mq);
1182 	}
1183 
1184 	r = bus_dmamem_alloc(virtio_dmat(vsc), dmamemsize, 0, 0,
1185 	    &sc->sc_segs[0], 1, &rsegs, BUS_DMA_NOWAIT);
1186 	if (r != 0) {
1187 		aprint_error_dev(sc->sc_dev,
1188 		    "DMA memory allocation failed, size %" PRIuBUSSIZE ", "
1189 		    "error code %d\n", dmamemsize, r);
1190 		goto err_none;
1191 	}
1192 	r = bus_dmamem_map(virtio_dmat(vsc), &sc->sc_segs[0], 1,
1193 	    dmamemsize, &vaddr, BUS_DMA_NOWAIT);
1194 	if (r != 0) {
1195 		aprint_error_dev(sc->sc_dev,
1196 		    "DMA memory map failed, error code %d\n", r);
1197 		goto err_dmamem_alloc;
1198 	}
1199 
1200 	/* assign DMA memory */
1201 	memset(vaddr, 0, dmamemsize);
1202 	sc->sc_dmamem = vaddr;
1203 	p = (intptr_t) vaddr;
1204 
1205 	for (qid = 0; qid < netq_num; qid++) {
1206 		netq = &sc->sc_netqs[qid];
1207 		maps = netq->netq_maps;
1208 		vq_num = netq->netq_vq->vq_num;
1209 
1210 		netq->netq_maps_kva = vioif_assign_mem(&p,
1211 		    sizeof(*maps[0].vnm_hdr) * vq_num);
1212 	}
1213 
1214 	if (sc->sc_has_ctrl) {
1215 		ctrlq->ctrlq_cmd = vioif_assign_mem(&p,
1216 		    sizeof(*ctrlq->ctrlq_cmd));
1217 		ctrlq->ctrlq_status = vioif_assign_mem(&p,
1218 		    sizeof(*ctrlq->ctrlq_status));
1219 		ctrlq->ctrlq_rx = vioif_assign_mem(&p,
1220 		    sizeof(*ctrlq->ctrlq_rx));
1221 		ctrlq->ctrlq_mac_tbl_uc = vioif_assign_mem(&p,
1222 		    sizeof(*ctrlq->ctrlq_mac_tbl_uc)
1223 		    + ETHER_ADDR_LEN);
1224 		ctrlq->ctrlq_mac_tbl_mc = vioif_assign_mem(&p,
1225 		    sizeof(*ctrlq->ctrlq_mac_tbl_mc)
1226 		    + ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_MAXENTRIES);
1227 		ctrlq->ctrlq_mac_addr = vioif_assign_mem(&p,
1228 		    sizeof(*ctrlq->ctrlq_mac_addr));
1229 		ctrlq->ctrlq_mq = vioif_assign_mem(&p, sizeof(*ctrlq->ctrlq_mq));
1230 	}
1231 
1232 	/* allocate kmem */
1233 	kmemsize = 0;
1234 
1235 	for (qid = 0; qid < netq_num; qid++) {
1236 		netq = &sc->sc_netqs[qid];
1237 		vq_num = netq->netq_vq->vq_num;
1238 
1239 		kmemsize += sizeof(netq->netq_maps[0]) * vq_num;
1240 	}
1241 
1242 	vaddr = kmem_zalloc(kmemsize, KM_SLEEP);
1243 	sc->sc_kmem = vaddr;
1244 
1245 	/* assign allocated kmem */
1246 	p = (intptr_t) vaddr;
1247 
1248 	for (qid = 0; qid < netq_num; qid++) {
1249 		netq = &sc->sc_netqs[qid];
1250 		vq_num = netq->netq_vq->vq_num;
1251 
1252 		netq->netq_maps = vioif_assign_mem(&p,
1253 		    sizeof(netq->netq_maps[0]) * vq_num);
1254 	}
1255 
1256 	/* prepare dmamaps */
1257 	for (qid = 0; qid < netq_num; qid++) {
1258 		static const struct {
1259 			const char	*msg_hdr;
1260 			const char	*msg_payload;
1261 			int		 dma_flag;
1262 			bus_size_t	 dma_size;
1263 			int		 dma_nsegs;
1264 		} dmaparams[VIOIF_NETQ_IDX] = {
1265 			[VIOIF_NETQ_RX] = {
1266 				.msg_hdr	= "rx header",
1267 				.msg_payload	= "rx payload",
1268 				.dma_flag	= BUS_DMA_READ,
1269 				.dma_size	= MCLBYTES - ETHER_ALIGN,
1270 				.dma_nsegs	= 1,
1271 			},
1272 			[VIOIF_NETQ_TX] = {
1273 				.msg_hdr	= "tx header",
1274 				.msg_payload	= "tx payload",
1275 				.dma_flag	= BUS_DMA_WRITE,
1276 				.dma_size	= ETHER_MAX_LEN,
1277 				.dma_nsegs	= VIRTIO_NET_TX_MAXNSEGS,
1278 			}
1279 		};
1280 
1281 		struct virtio_net_hdr *hdrs;
1282 		int dir;
1283 		int nsegs;
1284 
1285 		dir = VIOIF_NETQ_DIR(qid);
1286 		netq = &sc->sc_netqs[qid];
1287 		vq_num = netq->netq_vq->vq_num;
1288 		maps = netq->netq_maps;
1289 		hdrs = netq->netq_maps_kva;
1290 		nsegs = uimin(dmaparams[dir].dma_nsegs, vq_num - 1/*hdr*/);
1291 
1292 		for (i = 0; i < vq_num; i++) {
1293 			maps[i].vnm_hdr = &hdrs[i];
1294 
1295 			r = vioif_dmamap_create_load(sc, &maps[i].vnm_hdr_map,
1296 			    maps[i].vnm_hdr, sc->sc_hdr_size, 1,
1297 			    dmaparams[dir].dma_flag, dmaparams[dir].msg_hdr);
1298 			if (r != 0)
1299 				goto err_reqs;
1300 
1301 			r = vioif_dmamap_create(sc, &maps[i].vnm_mbuf_map,
1302 			    dmaparams[dir].dma_size, nsegs,
1303 			    dmaparams[dir].msg_payload);
1304 			if (r != 0)
1305 				goto err_reqs;
1306 		}
1307 	}
1308 
1309 	if (sc->sc_has_ctrl) {
1310 		/* control vq class & command */
1311 		r = vioif_dmamap_create_load(sc, &ctrlq->ctrlq_cmd_dmamap,
1312 		    ctrlq->ctrlq_cmd, sizeof(*ctrlq->ctrlq_cmd), 1,
1313 		    BUS_DMA_WRITE, "control command");
1314 		if (r != 0)
1315 			goto err_reqs;
1316 
1317 		r = vioif_dmamap_create_load(sc, &ctrlq->ctrlq_status_dmamap,
1318 		    ctrlq->ctrlq_status, sizeof(*ctrlq->ctrlq_status), 1,
1319 		    BUS_DMA_READ, "control status");
1320 		if (r != 0)
1321 			goto err_reqs;
1322 
1323 		/* control vq rx mode command parameter */
1324 		r = vioif_dmamap_create_load(sc, &ctrlq->ctrlq_rx_dmamap,
1325 		    ctrlq->ctrlq_rx, sizeof(*ctrlq->ctrlq_rx), 1,
1326 		    BUS_DMA_WRITE, "rx mode control command");
1327 		if (r != 0)
1328 			goto err_reqs;
1329 
1330 		/* multiqueue set command */
1331 		r = vioif_dmamap_create_load(sc, &ctrlq->ctrlq_mq_dmamap,
1332 		    ctrlq->ctrlq_mq, sizeof(*ctrlq->ctrlq_mq), 1,
1333 		    BUS_DMA_WRITE, "multiqueue set command");
1334 		if (r != 0)
1335 			goto err_reqs;
1336 
1337 		/* control vq MAC filter table for unicast */
1338 		/* do not load now since its length is variable */
1339 		r = vioif_dmamap_create(sc, &ctrlq->ctrlq_tbl_uc_dmamap,
1340 		    sizeof(*ctrlq->ctrlq_mac_tbl_uc)
1341 		    + ETHER_ADDR_LEN, 1,
1342 		    "unicast MAC address filter command");
1343 		if (r != 0)
1344 			goto err_reqs;
1345 
1346 		/* control vq MAC filter table for multicast */
1347 		r = vioif_dmamap_create(sc, &ctrlq->ctrlq_tbl_mc_dmamap,
1348 		    sizeof(*ctrlq->ctrlq_mac_tbl_mc)
1349 		    + ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_MAXENTRIES, 1,
1350 		    "multicast MAC address filter command");
1351 		if (r != 0)
1352 			goto err_reqs;
1353 
1354 		/* control vq MAC address set command */
1355 		r = vioif_dmamap_create_load(sc,
1356 		    &ctrlq->ctrlq_mac_addr_dmamap,
1357 		    ctrlq->ctrlq_mac_addr,
1358 		    sizeof(*ctrlq->ctrlq_mac_addr), 1,
1359 		    BUS_DMA_WRITE, "mac addr set command");
1360 		if (r != 0)
1361 			goto err_reqs;
1362 	}
1363 
1364 	return 0;
1365 
1366 err_reqs:
1367 	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_tbl_mc_dmamap);
1368 	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_tbl_uc_dmamap);
1369 	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_rx_dmamap);
1370 	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_status_dmamap);
1371 	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_cmd_dmamap);
1372 	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_mac_addr_dmamap);
1373 	for (qid = 0; qid < netq_num; qid++) {
1374 		vq_num = sc->sc_netqs[qid].netq_vq->vq_num;
1375 		maps = sc->sc_netqs[qid].netq_maps;
1376 
1377 		for (i = 0; i < vq_num; i++) {
1378 			vioif_dmamap_destroy(sc, &maps[i].vnm_mbuf_map);
1379 			vioif_dmamap_destroy(sc, &maps[i].vnm_hdr_map);
1380 		}
1381 	}
1382 	if (sc->sc_kmem) {
1383 		kmem_free(sc->sc_kmem, kmemsize);
1384 		sc->sc_kmem = NULL;
1385 	}
1386 	bus_dmamem_unmap(virtio_dmat(vsc), sc->sc_dmamem, dmamemsize);
1387 err_dmamem_alloc:
1388 	bus_dmamem_free(virtio_dmat(vsc), &sc->sc_segs[0], 1);
1389 err_none:
1390 	return -1;
1391 }
1392 
1393 static void
1394 vioif_alloc_queues(struct vioif_softc *sc)
1395 {
1396 	int nvq_pairs = sc->sc_max_nvq_pairs;
1397 	size_t nvqs, netq_num;
1398 
1399 	KASSERT(nvq_pairs <= VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX);
1400 
1401 	nvqs = netq_num = sc->sc_max_nvq_pairs * 2;
1402 	if (sc->sc_has_ctrl)
1403 		nvqs++;
1404 
1405 	sc->sc_vqs = kmem_zalloc(sizeof(sc->sc_vqs[0]) * nvqs, KM_SLEEP);
1406 	sc->sc_netqs = kmem_zalloc(sizeof(sc->sc_netqs[0]) * netq_num,
1407 	    KM_SLEEP);
1408 }
1409 
1410 static void
1411 vioif_free_queues(struct vioif_softc *sc)
1412 {
1413 	size_t nvqs, netq_num;
1414 
1415 	nvqs = netq_num = sc->sc_max_nvq_pairs * 2;
1416 	if (sc->sc_ctrlq.ctrlq_vq)
1417 		nvqs++;
1418 
1419 	kmem_free(sc->sc_netqs, sizeof(sc->sc_netqs[0]) * netq_num);
1420 	kmem_free(sc->sc_vqs, sizeof(sc->sc_vqs[0]) * nvqs);
1421 	sc->sc_netqs = NULL;
1422 	sc->sc_vqs = NULL;
1423 }
1424 
1425 /*
1426  * Network queues
1427  */
1428 static int
1429 vioif_netqueue_init(struct vioif_softc *sc, struct virtio_softc *vsc,
1430     size_t qid, u_int softint_flags)
1431 {
1432 	static const struct {
1433 		const char	*dirname;
1434 		int		 segsize;
1435 		int		 nsegs;
1436 		int 		(*intrhand)(void *);
1437 		void		(*sihand)(void *);
1438 	} params[VIOIF_NETQ_IDX] = {
1439 		[VIOIF_NETQ_RX] = {
1440 			.dirname	= "rx",
1441 			.segsize	= MCLBYTES,
1442 			.nsegs		= 2,
1443 			.intrhand	= vioif_rx_intr,
1444 			.sihand		= vioif_rx_handle,
1445 		},
1446 		[VIOIF_NETQ_TX] = {
1447 			.dirname	= "tx",
1448 			.segsize	= ETHER_MAX_LEN - ETHER_HDR_LEN,
1449 			.nsegs		= 2,
1450 			.intrhand	= vioif_tx_intr,
1451 			.sihand		= vioif_tx_handle,
1452 		}
1453 	};
1454 
1455 	struct virtqueue *vq;
1456 	struct vioif_netqueue *netq;
1457 	struct vioif_tx_context *txc;
1458 	struct vioif_rx_context *rxc;
1459 	char qname[32];
1460 	int r, dir;
1461 
1462 	txc = NULL;
1463 	rxc = NULL;
1464 	netq = &sc->sc_netqs[qid];
1465 	vq = &sc->sc_vqs[qid];
1466 	dir = VIOIF_NETQ_DIR(qid);
1467 
1468 	netq->netq_vq = &sc->sc_vqs[qid];
1469 	netq->netq_stopping = false;
1470 	netq->netq_running_handle = false;
1471 
1472 	snprintf(qname, sizeof(qname), "%s%zu",
1473 	    params[dir].dirname, VIOIF_NETQ_PAIRIDX(qid));
1474 	snprintf(netq->netq_evgroup, sizeof(netq->netq_evgroup),
1475 	    "%s-%s", device_xname(sc->sc_dev), qname);
1476 
1477 	mutex_init(&netq->netq_lock, MUTEX_DEFAULT, IPL_NET);
1478 	virtio_init_vq(vsc, vq, qid, params[dir].intrhand, netq);
1479 
1480 	r = virtio_alloc_vq(vsc, vq,
1481 	    params[dir].segsize + sc->sc_hdr_size,
1482 	    params[dir].nsegs, qname);
1483 	if (r != 0)
1484 		goto err;
1485 	netq->netq_vq = vq;
1486 
1487 	netq->netq_softint = softint_establish(softint_flags,
1488 	    params[dir].sihand, netq);
1489 	if (netq->netq_softint == NULL) {
1490 		aprint_error_dev(sc->sc_dev,
1491 		    "couldn't establish %s softint\n",
1492 		    params[dir].dirname);
1493 		goto err;
1494 	}
1495 	vioif_work_set(&netq->netq_work, params[dir].sihand, netq);
1496 
1497 	switch (dir) {
1498 	case VIOIF_NETQ_RX:
1499 		rxc = kmem_zalloc(sizeof(*rxc), KM_SLEEP);
1500 		netq->netq_ctx = rxc;
1501 		/* nothing to do */
1502 		break;
1503 	case VIOIF_NETQ_TX:
1504 		txc = kmem_zalloc(sizeof(*txc), KM_SLEEP);
1505 		netq->netq_ctx = (void *)txc;
1506 		txc->txc_deferred_transmit = softint_establish(softint_flags,
1507 		    vioif_deferred_transmit, netq);
1508 		if (txc->txc_deferred_transmit == NULL) {
1509 			aprint_error_dev(sc->sc_dev,
1510 			    "couldn't establish softint for "
1511 			    "tx deferred transmit\n");
1512 			goto err;
1513 		}
1514 		txc->txc_link_active = VIOIF_IS_LINK_ACTIVE(sc);
1515 		txc->txc_no_free_slots = false;
1516 		txc->txc_intrq = pcq_create(vq->vq_num, KM_SLEEP);
1517 		break;
1518 	}
1519 
1520 	return 0;
1521 
1522 err:
1523 	netq->netq_ctx = NULL;
1524 
1525 	if (rxc != NULL) {
1526 		kmem_free(rxc, sizeof(*rxc));
1527 	}
1528 
1529 	if (txc != NULL) {
1530 		if (txc->txc_deferred_transmit != NULL)
1531 			softint_disestablish(txc->txc_deferred_transmit);
1532 		if (txc->txc_intrq != NULL)
1533 			pcq_destroy(txc->txc_intrq);
1534 		kmem_free(txc, sizeof(txc));
1535 	}
1536 
1537 	vioif_work_set(&netq->netq_work, NULL, NULL);
1538 	if (netq->netq_softint != NULL) {
1539 		softint_disestablish(netq->netq_softint);
1540 		netq->netq_softint = NULL;
1541 	}
1542 
1543 	virtio_free_vq(vsc, vq);
1544 	mutex_destroy(&netq->netq_lock);
1545 	netq->netq_vq = NULL;
1546 
1547 	return -1;
1548 }
1549 
1550 static void
1551 vioif_netqueue_teardown(struct vioif_softc *sc, struct virtio_softc *vsc,
1552     size_t qid)
1553 {
1554 	struct vioif_netqueue *netq;
1555 	struct vioif_rx_context *rxc;
1556 	struct vioif_tx_context *txc;
1557 	int dir;
1558 
1559 	netq = &sc->sc_netqs[qid];
1560 
1561 	if (netq->netq_vq == NULL)
1562 		return;
1563 
1564 	netq = &sc->sc_netqs[qid];
1565 	dir = VIOIF_NETQ_DIR(qid);
1566 	switch (dir) {
1567 	case VIOIF_NETQ_RX:
1568 		rxc = netq->netq_ctx;
1569 		netq->netq_ctx = NULL;
1570 		kmem_free(rxc, sizeof(*rxc));
1571 		break;
1572 	case VIOIF_NETQ_TX:
1573 		txc = netq->netq_ctx;
1574 		netq->netq_ctx = NULL;
1575 		softint_disestablish(txc->txc_deferred_transmit);
1576 		pcq_destroy(txc->txc_intrq);
1577 		kmem_free(txc, sizeof(*txc));
1578 		break;
1579 	}
1580 
1581 	softint_disestablish(netq->netq_softint);
1582 	virtio_free_vq(vsc, netq->netq_vq);
1583 	mutex_destroy(&netq->netq_lock);
1584 	netq->netq_vq = NULL;
1585 }
1586 
1587 static void
1588 vioif_net_sched_handle(struct vioif_softc *sc, struct vioif_netqueue *netq)
1589 {
1590 
1591 	KASSERT(mutex_owned(&netq->netq_lock));
1592 	KASSERT(!netq->netq_stopping);
1593 
1594 	if (netq->netq_workqueue) {
1595 		vioif_work_add(sc->sc_txrx_workqueue, &netq->netq_work);
1596 	} else {
1597 		softint_schedule(netq->netq_softint);
1598 	}
1599 }
1600 
1601 static int
1602 vioif_net_load_mbuf(struct virtio_softc *vsc, struct vioif_net_map *map,
1603    struct mbuf *m, int dma_flags)
1604 {
1605 	int r;
1606 
1607 	KASSERT(map->vnm_mbuf == NULL);
1608 
1609 	r = bus_dmamap_load_mbuf(virtio_dmat(vsc),
1610 	    map->vnm_mbuf_map, m, dma_flags | BUS_DMA_NOWAIT);
1611 	if (r == 0) {
1612 		map->vnm_mbuf = m;
1613 	}
1614 
1615 	return r;
1616 }
1617 
1618 static void
1619 vioif_net_unload_mbuf(struct virtio_softc *vsc, struct vioif_net_map *map)
1620 {
1621 
1622 	KASSERT(map->vnm_mbuf != NULL);
1623 	bus_dmamap_unload(virtio_dmat(vsc), map->vnm_mbuf_map);
1624 	map->vnm_mbuf = NULL;
1625 }
1626 
1627 static int
1628 vioif_net_enqueue(struct virtio_softc *vsc, struct virtqueue *vq,
1629     int slot, struct vioif_net_map *map, int dma_ops, bool is_write)
1630 {
1631 	int r;
1632 
1633 	KASSERT(map->vnm_mbuf != NULL);
1634 
1635 	/* This should actually never fail */
1636 	r = virtio_enqueue_reserve(vsc, vq, slot,
1637 	    map->vnm_mbuf_map->dm_nsegs + 1);
1638 	if (r != 0) {
1639 		/* slot already freed by virtio_enqueue_reserve */
1640 		return r;
1641 	}
1642 
1643 	bus_dmamap_sync(virtio_dmat(vsc), map->vnm_mbuf_map,
1644 	    0, map->vnm_mbuf_map->dm_mapsize, dma_ops);
1645 	bus_dmamap_sync(virtio_dmat(vsc), map->vnm_hdr_map,
1646 	    0, map->vnm_hdr_map->dm_mapsize, dma_ops);
1647 
1648 	virtio_enqueue(vsc, vq, slot, map->vnm_hdr_map, is_write);
1649 	virtio_enqueue(vsc, vq, slot, map->vnm_mbuf_map, is_write);
1650 	virtio_enqueue_commit(vsc, vq, slot, false);
1651 
1652 	return 0;
1653 }
1654 
1655 static int
1656 vioif_net_enqueue_tx(struct virtio_softc *vsc, struct virtqueue *vq,
1657     int slot, struct vioif_net_map *map)
1658 {
1659 
1660 	return vioif_net_enqueue(vsc, vq, slot, map,
1661 	    BUS_DMASYNC_PREWRITE, true);
1662 }
1663 
1664 static int
1665 vioif_net_enqueue_rx(struct virtio_softc *vsc, struct virtqueue *vq,
1666     int slot, struct vioif_net_map *map)
1667 {
1668 
1669 	return vioif_net_enqueue(vsc, vq, slot, map,
1670 	    BUS_DMASYNC_PREREAD, false);
1671 }
1672 
1673 static struct mbuf *
1674 vioif_net_dequeue_commit(struct virtio_softc *vsc, struct virtqueue *vq,
1675    int slot, struct vioif_net_map *map, int dma_flags)
1676 {
1677 	struct mbuf *m;
1678 
1679 	m = map->vnm_mbuf;
1680 	KASSERT(m != NULL);
1681 	map->vnm_mbuf = NULL;
1682 
1683 	bus_dmamap_sync(virtio_dmat(vsc), map->vnm_hdr_map,
1684 	    0, map->vnm_hdr_map->dm_mapsize, dma_flags);
1685 	bus_dmamap_sync(virtio_dmat(vsc), map->vnm_mbuf_map,
1686 	    0, map->vnm_mbuf_map->dm_mapsize, dma_flags);
1687 
1688 	bus_dmamap_unload(virtio_dmat(vsc), map->vnm_mbuf_map);
1689 	virtio_dequeue_commit(vsc, vq, slot);
1690 
1691 	return m;
1692 }
1693 
1694 static void
1695 vioif_net_intr_enable(struct vioif_softc *sc, struct virtio_softc *vsc)
1696 {
1697 	struct vioif_netqueue *netq;
1698 	size_t i, act_qnum;
1699 	int enqueued;
1700 
1701 	act_qnum = sc->sc_act_nvq_pairs * 2;
1702 	for (i = 0; i < act_qnum; i++) {
1703 		netq = &sc->sc_netqs[i];
1704 
1705 		KASSERT(!netq->netq_stopping);
1706 		KASSERT(!netq->netq_running_handle);
1707 
1708 		enqueued = virtio_start_vq_intr(vsc, netq->netq_vq);
1709 		if (enqueued != 0) {
1710 			virtio_stop_vq_intr(vsc, netq->netq_vq);
1711 
1712 			mutex_enter(&netq->netq_lock);
1713 			netq->netq_running_handle = true;
1714 			vioif_net_sched_handle(sc, netq);
1715 			mutex_exit(&netq->netq_lock);
1716 		}
1717 	}
1718 }
1719 
1720 static void
1721 vioif_net_intr_disable(struct vioif_softc *sc, struct virtio_softc *vsc)
1722 {
1723 	struct vioif_netqueue *netq;
1724 	size_t i, act_qnum;
1725 
1726 	act_qnum = sc->sc_act_nvq_pairs * 2;
1727 	for (i = 0; i < act_qnum; i++) {
1728 		netq = &sc->sc_netqs[i];
1729 
1730 		virtio_stop_vq_intr(vsc, netq->netq_vq);
1731 	}
1732 }
1733 
1734 /*
1735  * Receive implementation
1736  */
1737 /* enqueue mbufs to receive slots */
1738 static void
1739 vioif_populate_rx_mbufs_locked(struct vioif_softc *sc, struct vioif_netqueue *netq)
1740 {
1741 	struct virtqueue *vq = netq->netq_vq;
1742 	struct virtio_softc *vsc = vq->vq_owner;
1743 	struct vioif_rx_context *rxc;
1744 	struct vioif_net_map *map;
1745 	struct mbuf *m;
1746 	int i, r, ndone = 0;
1747 
1748 	KASSERT(mutex_owned(&netq->netq_lock));
1749 
1750 	rxc = netq->netq_ctx;
1751 
1752 	for (i = 0; i < vq->vq_num; i++) {
1753 		int slot;
1754 		r = virtio_enqueue_prep(vsc, vq, &slot);
1755 		if (r == EAGAIN)
1756 			break;
1757 		if (__predict_false(r != 0))
1758 			panic("enqueue_prep for rx buffers");
1759 
1760 		MGETHDR(m, M_DONTWAIT, MT_DATA);
1761 		if (m == NULL) {
1762 			virtio_enqueue_abort(vsc, vq, slot);
1763 			rxc->rxc_mbuf_enobufs.ev_count++;
1764 			break;
1765 		}
1766 		MCLAIM(m, &sc->sc_ethercom.ec_rx_mowner);
1767 		MCLGET(m, M_DONTWAIT);
1768 		if ((m->m_flags & M_EXT) == 0) {
1769 			virtio_enqueue_abort(vsc, vq, slot);
1770 			m_freem(m);
1771 			rxc->rxc_mbuf_enobufs.ev_count++;
1772 			break;
1773 		}
1774 
1775 		m->m_len = m->m_pkthdr.len = MCLBYTES;
1776 		m_adj(m, ETHER_ALIGN);
1777 
1778 		map = &netq->netq_maps[slot];
1779 		r = vioif_net_load_mbuf(vsc, map, m, BUS_DMA_READ);
1780 		if (r != 0) {
1781 			virtio_enqueue_abort(vsc, vq, slot);
1782 			m_freem(m);
1783 			netq->netq_mbuf_load_failed.ev_count++;
1784 			break;
1785 		}
1786 
1787 		r = vioif_net_enqueue_rx(vsc, vq, slot, map);
1788 		if (r != 0) {
1789 			vioif_net_unload_mbuf(vsc, map);
1790 			netq->netq_enqueue_failed.ev_count++;
1791 			m_freem(m);
1792 			/* slot already freed by vioif_net_enqueue_rx */
1793 			break;
1794 		}
1795 
1796 		ndone++;
1797 	}
1798 
1799 	if (ndone > 0)
1800 		vioif_notify(vsc, vq);
1801 }
1802 
1803 /* dequeue received packets */
1804 static bool
1805 vioif_rx_deq_locked(struct vioif_softc *sc, struct virtio_softc *vsc,
1806     struct vioif_netqueue *netq, u_int limit, size_t *ndeqp)
1807 {
1808 	struct virtqueue *vq = netq->netq_vq;
1809 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1810 	struct vioif_net_map *map;
1811 	struct mbuf *m;
1812 	int slot, len;
1813 	bool more;
1814 	size_t ndeq;
1815 
1816 	KASSERT(mutex_owned(&netq->netq_lock));
1817 
1818 	more = false;
1819 	ndeq = 0;
1820 
1821 	if (virtio_vq_is_enqueued(vsc, vq) == false)
1822 		goto done;
1823 
1824 	for (;;ndeq++) {
1825 		if (ndeq >= limit) {
1826 			more = true;
1827 			break;
1828 		}
1829 
1830 		if (virtio_dequeue(vsc, vq, &slot, &len) != 0)
1831 			break;
1832 
1833 		map = &netq->netq_maps[slot];
1834 		KASSERT(map->vnm_mbuf != NULL);
1835 		m = vioif_net_dequeue_commit(vsc, vq, slot,
1836 		    map, BUS_DMASYNC_POSTREAD);
1837 		KASSERT(m != NULL);
1838 
1839 		m->m_len = m->m_pkthdr.len = len - sc->sc_hdr_size;
1840 		m_set_rcvif(m, ifp);
1841 		if_percpuq_enqueue(ifp->if_percpuq, m);
1842 	}
1843 
1844 done:
1845 	if (ndeqp != NULL)
1846 		*ndeqp = ndeq;
1847 
1848 	return more;
1849 }
1850 
1851 static void
1852 vioif_rx_queue_clear(struct vioif_softc *sc, struct virtio_softc *vsc,
1853     struct vioif_netqueue *netq)
1854 {
1855 	struct vioif_net_map *map;
1856 	struct mbuf *m;
1857 	unsigned int i, vq_num;
1858 	bool more;
1859 
1860 	mutex_enter(&netq->netq_lock);
1861 
1862 	vq_num = netq->netq_vq->vq_num;
1863 	for (;;) {
1864 		more = vioif_rx_deq_locked(sc, vsc, netq, vq_num, NULL);
1865 		if (more == false)
1866 			break;
1867 	}
1868 
1869 	for (i = 0; i < vq_num; i++) {
1870 		map = &netq->netq_maps[i];
1871 
1872 		m = map->vnm_mbuf;
1873 		if (m == NULL)
1874 			continue;
1875 
1876 		vioif_net_unload_mbuf(vsc, map);
1877 		m_freem(m);
1878 	}
1879 	mutex_exit(&netq->netq_lock);
1880 }
1881 
1882 static void
1883 vioif_rx_handle_locked(void *xnetq, u_int limit)
1884 {
1885 	struct vioif_netqueue *netq = xnetq;
1886 	struct virtqueue *vq = netq->netq_vq;
1887 	struct virtio_softc *vsc = vq->vq_owner;
1888 	struct vioif_softc *sc = device_private(virtio_child(vsc));
1889 	bool more;
1890 	int enqueued;
1891 	size_t ndeq;
1892 
1893 	KASSERT(mutex_owned(&netq->netq_lock));
1894 	KASSERT(!netq->netq_stopping);
1895 
1896 	more = vioif_rx_deq_locked(sc, vsc, netq, limit, &ndeq);
1897 	if (ndeq > 0)
1898 		vioif_populate_rx_mbufs_locked(sc, netq);
1899 
1900 	if (more) {
1901 		vioif_net_sched_handle(sc, netq);
1902 		return;
1903 	}
1904 
1905 	enqueued = virtio_start_vq_intr(vsc, netq->netq_vq);
1906 	if (enqueued != 0) {
1907 		virtio_stop_vq_intr(vsc, netq->netq_vq);
1908 		vioif_net_sched_handle(sc, netq);
1909 		return;
1910 	}
1911 
1912 	netq->netq_running_handle = false;
1913 }
1914 
1915 static int
1916 vioif_rx_intr(void *arg)
1917 {
1918 	struct vioif_netqueue *netq = arg;
1919 	struct virtqueue *vq = netq->netq_vq;
1920 	struct virtio_softc *vsc = vq->vq_owner;
1921 	struct vioif_softc *sc = device_private(virtio_child(vsc));
1922 	u_int limit;
1923 
1924 	mutex_enter(&netq->netq_lock);
1925 
1926 	/* handler is already running in softint/workqueue */
1927 	if (netq->netq_running_handle)
1928 		goto done;
1929 
1930 	if (netq->netq_stopping)
1931 		goto done;
1932 
1933 	netq->netq_running_handle = true;
1934 
1935 	limit = sc->sc_rx_intr_process_limit;
1936 	virtio_stop_vq_intr(vsc, vq);
1937 	vioif_rx_handle_locked(netq, limit);
1938 
1939 done:
1940 	mutex_exit(&netq->netq_lock);
1941 	return 1;
1942 }
1943 
1944 static void
1945 vioif_rx_handle(void *xnetq)
1946 {
1947 	struct vioif_netqueue *netq = xnetq;
1948 	struct virtqueue *vq = netq->netq_vq;
1949 	struct virtio_softc *vsc = vq->vq_owner;
1950 	struct vioif_softc *sc = device_private(virtio_child(vsc));
1951 	u_int limit;
1952 
1953 	mutex_enter(&netq->netq_lock);
1954 
1955 	KASSERT(netq->netq_running_handle);
1956 
1957 	if (netq->netq_stopping) {
1958 		netq->netq_running_handle = false;
1959 		goto done;
1960 	}
1961 
1962 	limit = sc->sc_rx_process_limit;
1963 	vioif_rx_handle_locked(netq, limit);
1964 
1965 done:
1966 	mutex_exit(&netq->netq_lock);
1967 }
1968 
1969 /*
1970  * Transmission implementation
1971  */
1972 /* enqueue mbufs to send */
1973 static void
1974 vioif_send_common_locked(struct ifnet *ifp, struct vioif_netqueue *netq,
1975     bool is_transmit)
1976 {
1977 	struct vioif_softc *sc = ifp->if_softc;
1978 	struct virtio_softc *vsc = sc->sc_virtio;
1979 	struct virtqueue *vq = netq->netq_vq;
1980 	struct vioif_tx_context *txc;
1981 	struct vioif_net_map *map;
1982 	struct mbuf *m;
1983 	int queued = 0;
1984 
1985 	KASSERT(mutex_owned(&netq->netq_lock));
1986 
1987 	if (netq->netq_stopping ||
1988 	    !ISSET(ifp->if_flags, IFF_RUNNING))
1989 		return;
1990 
1991 	txc = netq->netq_ctx;
1992 
1993 	if (!txc->txc_link_active ||
1994 	    txc->txc_no_free_slots)
1995 		return;
1996 
1997 	for (;;) {
1998 		int slot, r;
1999 		r = virtio_enqueue_prep(vsc, vq, &slot);
2000 		if (r == EAGAIN) {
2001 			txc->txc_no_free_slots = true;
2002 			break;
2003 		}
2004 		if (__predict_false(r != 0))
2005 			panic("enqueue_prep for tx buffers");
2006 
2007 		if (is_transmit)
2008 			m = pcq_get(txc->txc_intrq);
2009 		else
2010 			IFQ_DEQUEUE(&ifp->if_snd, m);
2011 
2012 		if (m == NULL) {
2013 			virtio_enqueue_abort(vsc, vq, slot);
2014 			break;
2015 		}
2016 
2017 		map = &netq->netq_maps[slot];
2018 		KASSERT(map->vnm_mbuf == NULL);
2019 
2020 		r = vioif_net_load_mbuf(vsc, map, m, BUS_DMA_WRITE);
2021 		if (r != 0) {
2022 			/* maybe just too fragmented */
2023 			struct mbuf *newm;
2024 
2025 			newm = m_defrag(m, M_NOWAIT);
2026 			if (newm != NULL) {
2027 				m = newm;
2028 				r = vioif_net_load_mbuf(vsc, map, m,
2029 				    BUS_DMA_WRITE);
2030 			} else {
2031 				txc->txc_defrag_failed.ev_count++;
2032 				r = -1;
2033 			}
2034 
2035 			if (r != 0) {
2036 				netq->netq_mbuf_load_failed.ev_count++;
2037 				m_freem(m);
2038 				if_statinc(ifp, if_oerrors);
2039 				virtio_enqueue_abort(vsc, vq, slot);
2040 				continue;
2041 			}
2042 		}
2043 
2044 		memset(map->vnm_hdr, 0, sc->sc_hdr_size);
2045 
2046 		r = vioif_net_enqueue_tx(vsc, vq, slot, map);
2047 		if (r != 0) {
2048 			netq->netq_enqueue_failed.ev_count++;
2049 			vioif_net_unload_mbuf(vsc, map);
2050 			m_freem(m);
2051 			/* slot already freed by vioif_net_enqueue_tx */
2052 
2053 			if_statinc(ifp, if_oerrors);
2054 			continue;
2055 		}
2056 
2057 		queued++;
2058 		bpf_mtap(ifp, m, BPF_D_OUT);
2059 	}
2060 
2061 	if (queued > 0) {
2062 		vioif_notify(vsc, vq);
2063 		ifp->if_timer = 5;
2064 	}
2065 }
2066 
2067 /* dequeue sent mbufs */
2068 static bool
2069 vioif_tx_deq_locked(struct vioif_softc *sc, struct virtio_softc *vsc,
2070     struct vioif_netqueue *netq, u_int limit, size_t *ndeqp)
2071 {
2072 	struct virtqueue *vq = netq->netq_vq;
2073 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
2074 	struct vioif_net_map *map;
2075 	struct mbuf *m;
2076 	int slot, len;
2077 	bool more;
2078 	size_t ndeq;
2079 
2080 	KASSERT(mutex_owned(&netq->netq_lock));
2081 
2082 	more = false;
2083 	ndeq = 0;
2084 
2085 	if (virtio_vq_is_enqueued(vsc, vq) == false)
2086 		goto done;
2087 
2088 	for (;;ndeq++) {
2089 		if (limit-- == 0) {
2090 			more = true;
2091 			break;
2092 		}
2093 
2094 		if (virtio_dequeue(vsc, vq, &slot, &len) != 0)
2095 			break;
2096 
2097 		map = &netq->netq_maps[slot];
2098 		KASSERT(map->vnm_mbuf != NULL);
2099 		m = vioif_net_dequeue_commit(vsc, vq, slot,
2100 		    map, BUS_DMASYNC_POSTWRITE);
2101 		KASSERT(m != NULL);
2102 
2103 		if_statinc(ifp, if_opackets);
2104 		m_freem(m);
2105 	}
2106 
2107 done:
2108 	if (ndeqp != NULL)
2109 		*ndeqp = ndeq;
2110 	return more;
2111 }
2112 
2113 static void
2114 vioif_tx_queue_clear(struct vioif_softc *sc, struct virtio_softc *vsc,
2115     struct vioif_netqueue *netq)
2116 {
2117 	struct vioif_tx_context *txc;
2118 	struct vioif_net_map *map;
2119 	struct mbuf *m;
2120 	unsigned int i, vq_num;
2121 	bool more;
2122 
2123 	mutex_enter(&netq->netq_lock);
2124 
2125 	txc = netq->netq_ctx;
2126 	vq_num = netq->netq_vq->vq_num;
2127 
2128 	for (;;) {
2129 		more = vioif_tx_deq_locked(sc, vsc, netq, vq_num, NULL);
2130 		if (more == false)
2131 			break;
2132 	}
2133 
2134 	for (i = 0; i < vq_num; i++) {
2135 		map = &netq->netq_maps[i];
2136 
2137 		m = map->vnm_mbuf;
2138 		if (m == NULL)
2139 			continue;
2140 
2141 		vioif_net_unload_mbuf(vsc, map);
2142 		m_freem(m);
2143 	}
2144 
2145 	txc->txc_no_free_slots = false;
2146 
2147 	mutex_exit(&netq->netq_lock);
2148 }
2149 
2150 static void
2151 vioif_start_locked(struct ifnet *ifp, struct vioif_netqueue *netq)
2152 {
2153 
2154 	/*
2155 	 * ifp->if_obytes and ifp->if_omcasts are added in if_transmit()@if.c.
2156 	 */
2157 	vioif_send_common_locked(ifp, netq, false);
2158 
2159 }
2160 
2161 static void
2162 vioif_transmit_locked(struct ifnet *ifp, struct vioif_netqueue *netq)
2163 {
2164 
2165 	vioif_send_common_locked(ifp, netq, true);
2166 }
2167 
2168 static void
2169 vioif_deferred_transmit(void *arg)
2170 {
2171 	struct vioif_netqueue *netq = arg;
2172 	struct virtio_softc *vsc = netq->netq_vq->vq_owner;
2173 	struct vioif_softc *sc = device_private(virtio_child(vsc));
2174 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
2175 
2176 	mutex_enter(&netq->netq_lock);
2177 	vioif_send_common_locked(ifp, netq, true);
2178 	mutex_exit(&netq->netq_lock);
2179 }
2180 
2181 static void
2182 vioif_tx_handle_locked(struct vioif_netqueue *netq, u_int limit)
2183 {
2184 	struct virtqueue *vq = netq->netq_vq;
2185 	struct vioif_tx_context *txc = netq->netq_ctx;
2186 	struct virtio_softc *vsc = vq->vq_owner;
2187 	struct vioif_softc *sc = device_private(virtio_child(vsc));
2188 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
2189 	bool more;
2190 	int enqueued;
2191 	size_t ndeq;
2192 
2193 	KASSERT(mutex_owned(&netq->netq_lock));
2194 	KASSERT(!netq->netq_stopping);
2195 
2196 	more = vioif_tx_deq_locked(sc, vsc, netq, limit, &ndeq);
2197 	if (txc->txc_no_free_slots && ndeq > 0) {
2198 		txc->txc_no_free_slots = false;
2199 		softint_schedule(txc->txc_deferred_transmit);
2200 	}
2201 
2202 	if (more) {
2203 		vioif_net_sched_handle(sc, netq);
2204 		return;
2205 	}
2206 
2207 	enqueued = (virtio_features(vsc) & VIRTIO_F_RING_EVENT_IDX) ?
2208 	    virtio_postpone_intr_smart(vsc, vq):
2209 	    virtio_start_vq_intr(vsc, vq);
2210 	if (enqueued != 0) {
2211 		virtio_stop_vq_intr(vsc, vq);
2212 		vioif_net_sched_handle(sc, netq);
2213 		return;
2214 	}
2215 
2216 	netq->netq_running_handle = false;
2217 
2218 	/* for ALTQ */
2219 	if (netq == &sc->sc_netqs[VIOIF_NETQ_TXQID(0)])
2220 		if_schedule_deferred_start(ifp);
2221 
2222 	softint_schedule(txc->txc_deferred_transmit);
2223 }
2224 
2225 static int
2226 vioif_tx_intr(void *arg)
2227 {
2228 	struct vioif_netqueue *netq = arg;
2229 	struct virtqueue *vq = netq->netq_vq;
2230 	struct virtio_softc *vsc = vq->vq_owner;
2231 	struct vioif_softc *sc = device_private(virtio_child(vsc));
2232 	u_int limit;
2233 
2234 	mutex_enter(&netq->netq_lock);
2235 
2236 	/* tx handler is already running in softint/workqueue */
2237 	if (netq->netq_running_handle)
2238 		goto done;
2239 
2240 	if (netq->netq_stopping)
2241 		goto done;
2242 
2243 	netq->netq_running_handle = true;
2244 
2245 	virtio_stop_vq_intr(vsc, vq);
2246 	netq->netq_workqueue = sc->sc_txrx_workqueue_sysctl;
2247 	limit = sc->sc_tx_intr_process_limit;
2248 	vioif_tx_handle_locked(netq, limit);
2249 
2250 done:
2251 	mutex_exit(&netq->netq_lock);
2252 	return 1;
2253 }
2254 
2255 static void
2256 vioif_tx_handle(void *xnetq)
2257 {
2258 	struct vioif_netqueue *netq = xnetq;
2259 	struct virtqueue *vq = netq->netq_vq;
2260 	struct virtio_softc *vsc = vq->vq_owner;
2261 	struct vioif_softc *sc = device_private(virtio_child(vsc));
2262 	u_int limit;
2263 
2264 	mutex_enter(&netq->netq_lock);
2265 
2266 	KASSERT(netq->netq_running_handle);
2267 
2268 	if (netq->netq_stopping) {
2269 		netq->netq_running_handle = false;
2270 		goto done;
2271 	}
2272 
2273 	limit = sc->sc_tx_process_limit;
2274 	vioif_tx_handle_locked(netq, limit);
2275 
2276 done:
2277 	mutex_exit(&netq->netq_lock);
2278 }
2279 
2280 /*
2281  * Control vq
2282  */
2283 /* issue a VIRTIO_NET_CTRL_RX class command and wait for completion */
2284 static void
2285 vioif_ctrl_acquire(struct vioif_softc *sc)
2286 {
2287 	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
2288 
2289 	mutex_enter(&ctrlq->ctrlq_wait_lock);
2290 	while (ctrlq->ctrlq_inuse != FREE)
2291 		cv_wait(&ctrlq->ctrlq_wait, &ctrlq->ctrlq_wait_lock);
2292 	ctrlq->ctrlq_inuse = INUSE;
2293 	ctrlq->ctrlq_owner = curlwp;
2294 	mutex_exit(&ctrlq->ctrlq_wait_lock);
2295 }
2296 
2297 static void
2298 vioif_ctrl_release(struct vioif_softc *sc)
2299 {
2300 	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
2301 
2302 	KASSERT(ctrlq->ctrlq_inuse != FREE);
2303 	KASSERT(ctrlq->ctrlq_owner == curlwp);
2304 
2305 	mutex_enter(&ctrlq->ctrlq_wait_lock);
2306 	ctrlq->ctrlq_inuse = FREE;
2307 	ctrlq->ctrlq_owner = NULL;
2308 	cv_signal(&ctrlq->ctrlq_wait);
2309 	mutex_exit(&ctrlq->ctrlq_wait_lock);
2310 }
2311 
2312 static int
2313 vioif_ctrl_load_cmdspec(struct vioif_softc *sc,
2314     struct vioif_ctrl_cmdspec *specs, int nspecs)
2315 {
2316 	struct virtio_softc *vsc = sc->sc_virtio;
2317 	int i, r, loaded;
2318 
2319 	loaded = 0;
2320 	for (i = 0; i < nspecs; i++) {
2321 		r = bus_dmamap_load(virtio_dmat(vsc),
2322 		    specs[i].dmamap, specs[i].buf, specs[i].bufsize,
2323 		    NULL, BUS_DMA_WRITE | BUS_DMA_NOWAIT);
2324 		if (r) {
2325 			sc->sc_ctrlq.ctrlq_cmd_load_failed.ev_count++;
2326 			goto err;
2327 		}
2328 		loaded++;
2329 
2330 	}
2331 
2332 	return r;
2333 
2334 err:
2335 	for (i = 0; i < loaded; i++) {
2336 		bus_dmamap_unload(virtio_dmat(vsc), specs[i].dmamap);
2337 	}
2338 
2339 	return r;
2340 }
2341 
2342 static void
2343 vioif_ctrl_unload_cmdspec(struct vioif_softc *sc,
2344     struct vioif_ctrl_cmdspec *specs, int nspecs)
2345 {
2346 	struct virtio_softc *vsc = sc->sc_virtio;
2347 	int i;
2348 
2349 	for (i = 0; i < nspecs; i++) {
2350 		bus_dmamap_unload(virtio_dmat(vsc), specs[i].dmamap);
2351 	}
2352 }
2353 
2354 static int
2355 vioif_ctrl_send_command(struct vioif_softc *sc, uint8_t class, uint8_t cmd,
2356     struct vioif_ctrl_cmdspec *specs, int nspecs)
2357 {
2358 	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
2359 	struct virtqueue *vq = ctrlq->ctrlq_vq;
2360 	struct virtio_softc *vsc = sc->sc_virtio;
2361 	int i, r, slot;
2362 
2363 	ctrlq->ctrlq_cmd->class = class;
2364 	ctrlq->ctrlq_cmd->command = cmd;
2365 
2366 	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_cmd_dmamap,
2367 	    0, sizeof(struct virtio_net_ctrl_cmd), BUS_DMASYNC_PREWRITE);
2368 	for (i = 0; i < nspecs; i++) {
2369 		bus_dmamap_sync(virtio_dmat(vsc), specs[i].dmamap,
2370 		    0, specs[i].bufsize, BUS_DMASYNC_PREWRITE);
2371 	}
2372 	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_status_dmamap,
2373 	    0, sizeof(struct virtio_net_ctrl_status), BUS_DMASYNC_PREREAD);
2374 
2375 	/* we need to explicitly (re)start vq intr when using RING EVENT IDX */
2376 	if (virtio_features(vsc) & VIRTIO_F_RING_EVENT_IDX)
2377 		virtio_start_vq_intr(vsc, ctrlq->ctrlq_vq);
2378 
2379 	r = virtio_enqueue_prep(vsc, vq, &slot);
2380 	if (r != 0)
2381 		panic("%s: control vq busy!?", device_xname(sc->sc_dev));
2382 	r = virtio_enqueue_reserve(vsc, vq, slot, nspecs + 2);
2383 	if (r != 0)
2384 		panic("%s: control vq busy!?", device_xname(sc->sc_dev));
2385 	virtio_enqueue(vsc, vq, slot, ctrlq->ctrlq_cmd_dmamap, true);
2386 	for (i = 0; i < nspecs; i++) {
2387 		virtio_enqueue(vsc, vq, slot, specs[i].dmamap, true);
2388 	}
2389 	virtio_enqueue(vsc, vq, slot, ctrlq->ctrlq_status_dmamap, false);
2390 	virtio_enqueue_commit(vsc, vq, slot, true);
2391 
2392 	/* wait for done */
2393 	mutex_enter(&ctrlq->ctrlq_wait_lock);
2394 	while (ctrlq->ctrlq_inuse != DONE)
2395 		cv_wait(&ctrlq->ctrlq_wait, &ctrlq->ctrlq_wait_lock);
2396 	mutex_exit(&ctrlq->ctrlq_wait_lock);
2397 	/* already dequeued */
2398 
2399 	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_cmd_dmamap, 0,
2400 	    sizeof(struct virtio_net_ctrl_cmd), BUS_DMASYNC_POSTWRITE);
2401 	for (i = 0; i < nspecs; i++) {
2402 		bus_dmamap_sync(virtio_dmat(vsc), specs[i].dmamap, 0,
2403 		    specs[i].bufsize, BUS_DMASYNC_POSTWRITE);
2404 	}
2405 	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_status_dmamap, 0,
2406 	    sizeof(struct virtio_net_ctrl_status), BUS_DMASYNC_POSTREAD);
2407 
2408 	if (ctrlq->ctrlq_status->ack == VIRTIO_NET_OK)
2409 		r = 0;
2410 	else {
2411 		device_printf(sc->sc_dev, "failed setting rx mode\n");
2412 		sc->sc_ctrlq.ctrlq_cmd_failed.ev_count++;
2413 		r = EIO;
2414 	}
2415 
2416 	return r;
2417 }
2418 
2419 /* ctrl vq interrupt; wake up the command issuer */
2420 static int
2421 vioif_ctrl_intr(void *arg)
2422 {
2423 	struct vioif_ctrlqueue *ctrlq = arg;
2424 	struct virtqueue *vq = ctrlq->ctrlq_vq;
2425 	struct virtio_softc *vsc = vq->vq_owner;
2426 	int r, slot;
2427 
2428 	if (virtio_vq_is_enqueued(vsc, vq) == false)
2429 		return 0;
2430 
2431 	r = virtio_dequeue(vsc, vq, &slot, NULL);
2432 	if (r == ENOENT)
2433 		return 0;
2434 	virtio_dequeue_commit(vsc, vq, slot);
2435 
2436 	mutex_enter(&ctrlq->ctrlq_wait_lock);
2437 	ctrlq->ctrlq_inuse = DONE;
2438 	cv_signal(&ctrlq->ctrlq_wait);
2439 	mutex_exit(&ctrlq->ctrlq_wait_lock);
2440 
2441 	return 1;
2442 }
2443 
2444 static int
2445 vioif_ctrl_rx(struct vioif_softc *sc, int cmd, bool onoff)
2446 {
2447 	struct virtio_net_ctrl_rx *rx = sc->sc_ctrlq.ctrlq_rx;
2448 	struct vioif_ctrl_cmdspec specs[1];
2449 	int r;
2450 
2451 	if (!sc->sc_has_ctrl)
2452 		return ENOTSUP;
2453 
2454 	vioif_ctrl_acquire(sc);
2455 
2456 	rx->onoff = onoff;
2457 	specs[0].dmamap = sc->sc_ctrlq.ctrlq_rx_dmamap;
2458 	specs[0].buf = rx;
2459 	specs[0].bufsize = sizeof(*rx);
2460 
2461 	r = vioif_ctrl_send_command(sc, VIRTIO_NET_CTRL_RX, cmd,
2462 	    specs, __arraycount(specs));
2463 
2464 	vioif_ctrl_release(sc);
2465 	return r;
2466 }
2467 
2468 static int
2469 vioif_set_promisc(struct vioif_softc *sc, bool onoff)
2470 {
2471 	return vioif_ctrl_rx(sc, VIRTIO_NET_CTRL_RX_PROMISC, onoff);
2472 }
2473 
2474 static int
2475 vioif_set_allmulti(struct vioif_softc *sc, bool onoff)
2476 {
2477 	return vioif_ctrl_rx(sc, VIRTIO_NET_CTRL_RX_ALLMULTI, onoff);
2478 }
2479 
2480 static int
2481 vioif_ctrl_mq_vq_pairs_set(struct vioif_softc *sc, int nvq_pairs)
2482 {
2483 	struct virtio_net_ctrl_mq *mq = sc->sc_ctrlq.ctrlq_mq;
2484 	struct vioif_ctrl_cmdspec specs[1];
2485 	int r;
2486 
2487 	if (!sc->sc_has_ctrl)
2488 		return ENOTSUP;
2489 
2490 	if (nvq_pairs <= 1)
2491 		return EINVAL;
2492 
2493 	vioif_ctrl_acquire(sc);
2494 
2495 	mq->virtqueue_pairs = virtio_rw16(sc->sc_virtio, nvq_pairs);
2496 	specs[0].dmamap = sc->sc_ctrlq.ctrlq_mq_dmamap;
2497 	specs[0].buf = mq;
2498 	specs[0].bufsize = sizeof(*mq);
2499 
2500 	r = vioif_ctrl_send_command(sc,
2501 	    VIRTIO_NET_CTRL_MQ, VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET,
2502 	    specs, __arraycount(specs));
2503 
2504 	vioif_ctrl_release(sc);
2505 
2506 	return r;
2507 }
2508 
2509 static int
2510 vioif_set_mac_addr(struct vioif_softc *sc)
2511 {
2512 	struct virtio_net_ctrl_mac_addr *ma =
2513 	    sc->sc_ctrlq.ctrlq_mac_addr;
2514 	struct vioif_ctrl_cmdspec specs[1];
2515 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
2516 	int nspecs = __arraycount(specs);
2517 	uint64_t features;
2518 	int r;
2519 	size_t i;
2520 
2521 	if (!sc->sc_has_ctrl)
2522 		return ENOTSUP;
2523 
2524 	if (memcmp(CLLADDR(ifp->if_sadl), sc->sc_mac,
2525 	    ETHER_ADDR_LEN) == 0) {
2526 		return 0;
2527 	}
2528 
2529 	memcpy(sc->sc_mac, CLLADDR(ifp->if_sadl), ETHER_ADDR_LEN);
2530 
2531 	features = virtio_features(sc->sc_virtio);
2532 	if (features & VIRTIO_NET_F_CTRL_MAC_ADDR) {
2533 		vioif_ctrl_acquire(sc);
2534 
2535 		memcpy(ma->mac, sc->sc_mac, ETHER_ADDR_LEN);
2536 		specs[0].dmamap = sc->sc_ctrlq.ctrlq_mac_addr_dmamap;
2537 		specs[0].buf = ma;
2538 		specs[0].bufsize = sizeof(*ma);
2539 
2540 		r = vioif_ctrl_send_command(sc,
2541 		    VIRTIO_NET_CTRL_MAC, VIRTIO_NET_CTRL_MAC_ADDR_SET,
2542 		    specs, nspecs);
2543 
2544 		vioif_ctrl_release(sc);
2545 	} else {
2546 		for (i = 0; i < __arraycount(sc->sc_mac); i++) {
2547 			virtio_write_device_config_1(sc->sc_virtio,
2548 			    VIRTIO_NET_CONFIG_MAC + i, sc->sc_mac[i]);
2549 		}
2550 		r = 0;
2551 	}
2552 
2553 	return r;
2554 }
2555 
2556 static int
2557 vioif_set_rx_filter(struct vioif_softc *sc)
2558 {
2559 	/* filter already set in ctrlq->ctrlq_mac_tbl */
2560 	struct virtio_softc *vsc = sc->sc_virtio;
2561 	struct virtio_net_ctrl_mac_tbl *mac_tbl_uc, *mac_tbl_mc;
2562 	struct vioif_ctrl_cmdspec specs[2];
2563 	int nspecs = __arraycount(specs);
2564 	int r;
2565 
2566 	mac_tbl_uc = sc->sc_ctrlq.ctrlq_mac_tbl_uc;
2567 	mac_tbl_mc = sc->sc_ctrlq.ctrlq_mac_tbl_mc;
2568 
2569 	if (!sc->sc_has_ctrl)
2570 		return ENOTSUP;
2571 
2572 	vioif_ctrl_acquire(sc);
2573 
2574 	specs[0].dmamap = sc->sc_ctrlq.ctrlq_tbl_uc_dmamap;
2575 	specs[0].buf = mac_tbl_uc;
2576 	specs[0].bufsize = sizeof(*mac_tbl_uc)
2577 	    + (ETHER_ADDR_LEN * virtio_rw32(vsc, mac_tbl_uc->nentries));
2578 
2579 	specs[1].dmamap = sc->sc_ctrlq.ctrlq_tbl_mc_dmamap;
2580 	specs[1].buf = mac_tbl_mc;
2581 	specs[1].bufsize = sizeof(*mac_tbl_mc)
2582 	    + (ETHER_ADDR_LEN * virtio_rw32(vsc, mac_tbl_mc->nentries));
2583 
2584 	r = vioif_ctrl_load_cmdspec(sc, specs, nspecs);
2585 	if (r != 0)
2586 		goto out;
2587 
2588 	r = vioif_ctrl_send_command(sc,
2589 	    VIRTIO_NET_CTRL_MAC, VIRTIO_NET_CTRL_MAC_TABLE_SET,
2590 	    specs, nspecs);
2591 
2592 	vioif_ctrl_unload_cmdspec(sc, specs, nspecs);
2593 
2594 out:
2595 	vioif_ctrl_release(sc);
2596 
2597 	return r;
2598 }
2599 
2600 /*
2601  * If multicast filter small enough (<=MAXENTRIES) set rx filter
2602  * If large multicast filter exist use ALLMULTI
2603  * If setting rx filter fails fall back to ALLMULTI
2604  */
2605 static int
2606 vioif_rx_filter(struct vioif_softc *sc)
2607 {
2608 	struct virtio_softc *vsc = sc->sc_virtio;
2609 	struct ethercom *ec = &sc->sc_ethercom;
2610 	struct ifnet *ifp = &ec->ec_if;
2611 	struct ether_multi *enm;
2612 	struct ether_multistep step;
2613 	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
2614 	int nentries;
2615 	bool allmulti = 0;
2616 	int r;
2617 
2618 	if (!sc->sc_has_ctrl) {
2619 		goto set_ifflags;
2620 	}
2621 
2622 	memcpy(ctrlq->ctrlq_mac_tbl_uc->macs[0],
2623 	    CLLADDR(ifp->if_sadl), ETHER_ADDR_LEN);
2624 
2625 	nentries = 0;
2626 	allmulti = false;
2627 
2628 	ETHER_LOCK(ec);
2629 	for (ETHER_FIRST_MULTI(step, ec, enm); enm != NULL;
2630 	    ETHER_NEXT_MULTI(step, enm)) {
2631 		if (nentries >= VIRTIO_NET_CTRL_MAC_MAXENTRIES) {
2632 			allmulti = true;
2633 			break;
2634 		}
2635 		if (memcmp(enm->enm_addrlo, enm->enm_addrhi, ETHER_ADDR_LEN)) {
2636 			allmulti = true;
2637 			break;
2638 		}
2639 
2640 		memcpy(ctrlq->ctrlq_mac_tbl_mc->macs[nentries],
2641 		    enm->enm_addrlo, ETHER_ADDR_LEN);
2642 		nentries++;
2643 	}
2644 	ETHER_UNLOCK(ec);
2645 
2646 	r = vioif_set_mac_addr(sc);
2647 	if (r != 0) {
2648 		log(LOG_WARNING, "%s: couldn't set MAC address\n",
2649 		    ifp->if_xname);
2650 	}
2651 
2652 	if (!allmulti) {
2653 		ctrlq->ctrlq_mac_tbl_uc->nentries = virtio_rw32(vsc, 1);
2654 		ctrlq->ctrlq_mac_tbl_mc->nentries = virtio_rw32(vsc, nentries);
2655 		r = vioif_set_rx_filter(sc);
2656 		if (r != 0) {
2657 			allmulti = true; /* fallback */
2658 		}
2659 	}
2660 
2661 	if (allmulti) {
2662 		ctrlq->ctrlq_mac_tbl_uc->nentries = virtio_rw32(vsc, 0);
2663 		ctrlq->ctrlq_mac_tbl_mc->nentries = virtio_rw32(vsc, 0);
2664 		r = vioif_set_rx_filter(sc);
2665 		if (r != 0) {
2666 			log(LOG_DEBUG, "%s: couldn't clear RX filter\n",
2667 			    ifp->if_xname);
2668 			/* what to do on failure? */
2669 		}
2670 
2671 		ifp->if_flags |= IFF_ALLMULTI;
2672 	}
2673 
2674 set_ifflags:
2675 	r = vioif_ifflags(sc);
2676 
2677 	return r;
2678 }
2679 
2680 /*
2681  * VM configuration changes
2682  */
2683 static int
2684 vioif_config_change(struct virtio_softc *vsc)
2685 {
2686 	struct vioif_softc *sc = device_private(virtio_child(vsc));
2687 
2688 	softint_schedule(sc->sc_cfg_softint);
2689 	return 0;
2690 }
2691 
2692 static void
2693 vioif_cfg_softint(void *arg)
2694 {
2695 	struct vioif_softc *sc = arg;
2696 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
2697 
2698 	vioif_update_link_status(sc);
2699 	vioif_start(ifp);
2700 }
2701 
2702 static int
2703 vioif_get_link_status(struct vioif_softc *sc)
2704 {
2705 	struct virtio_softc *vsc = sc->sc_virtio;
2706 	uint16_t status;
2707 
2708 	if (virtio_features(vsc) & VIRTIO_NET_F_STATUS)
2709 		status = virtio_read_device_config_2(vsc,
2710 		    VIRTIO_NET_CONFIG_STATUS);
2711 	else
2712 		status = VIRTIO_NET_S_LINK_UP;
2713 
2714 	if ((status & VIRTIO_NET_S_LINK_UP) != 0)
2715 		return LINK_STATE_UP;
2716 
2717 	return LINK_STATE_DOWN;
2718 }
2719 
2720 static void
2721 vioif_update_link_status(struct vioif_softc *sc)
2722 {
2723 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
2724 	struct vioif_netqueue *netq;
2725 	struct vioif_tx_context *txc;
2726 	bool active;
2727 	int link, i;
2728 
2729 	mutex_enter(&sc->sc_lock);
2730 
2731 	link = vioif_get_link_status(sc);
2732 
2733 	if (link == sc->sc_link_state)
2734 		goto done;
2735 
2736 	sc->sc_link_state = link;
2737 
2738 	active = VIOIF_IS_LINK_ACTIVE(sc);
2739 	for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
2740 		netq = &sc->sc_netqs[VIOIF_NETQ_TXQID(i)];
2741 
2742 		mutex_enter(&netq->netq_lock);
2743 		txc = netq->netq_ctx;
2744 		txc->txc_link_active = active;
2745 		mutex_exit(&netq->netq_lock);
2746 	}
2747 
2748 	if_link_state_change(ifp, sc->sc_link_state);
2749 
2750 done:
2751 	mutex_exit(&sc->sc_lock);
2752 }
2753 
2754 static void
2755 vioif_workq_work(struct work *wk, void *context)
2756 {
2757 	struct vioif_work *work;
2758 
2759 	work = container_of(wk, struct vioif_work, cookie);
2760 
2761 	atomic_store_relaxed(&work->added, 0);
2762 	work->func(work->arg);
2763 }
2764 
2765 static struct workqueue *
2766 vioif_workq_create(const char *name, pri_t prio, int ipl, int flags)
2767 {
2768 	struct workqueue *wq;
2769 	int error;
2770 
2771 	error = workqueue_create(&wq, name, vioif_workq_work, NULL,
2772 	    prio, ipl, flags);
2773 
2774 	if (error)
2775 		return NULL;
2776 
2777 	return wq;
2778 }
2779 
2780 static void
2781 vioif_workq_destroy(struct workqueue *wq)
2782 {
2783 
2784 	workqueue_destroy(wq);
2785 }
2786 
2787 static void
2788 vioif_work_set(struct vioif_work *work, void (*func)(void *), void *arg)
2789 {
2790 
2791 	memset(work, 0, sizeof(*work));
2792 	work->func = func;
2793 	work->arg = arg;
2794 }
2795 
2796 static void
2797 vioif_work_add(struct workqueue *wq, struct vioif_work *work)
2798 {
2799 
2800 	if (atomic_load_relaxed(&work->added) != 0)
2801 		return;
2802 
2803 	atomic_store_relaxed(&work->added, 1);
2804 	kpreempt_disable();
2805 	workqueue_enqueue(wq, &work->cookie, NULL);
2806 	kpreempt_enable();
2807 }
2808 
2809 static void
2810 vioif_work_wait(struct workqueue *wq, struct vioif_work *work)
2811 {
2812 
2813 	workqueue_wait(wq, &work->cookie);
2814 }
2815 
2816 MODULE(MODULE_CLASS_DRIVER, if_vioif, "virtio");
2817 
2818 #ifdef _MODULE
2819 #include "ioconf.c"
2820 #endif
2821 
2822 static int
2823 if_vioif_modcmd(modcmd_t cmd, void *opaque)
2824 {
2825 	int error = 0;
2826 
2827 #ifdef _MODULE
2828 	switch (cmd) {
2829 	case MODULE_CMD_INIT:
2830 		error = config_init_component(cfdriver_ioconf_if_vioif,
2831 		    cfattach_ioconf_if_vioif, cfdata_ioconf_if_vioif);
2832 		break;
2833 	case MODULE_CMD_FINI:
2834 		error = config_fini_component(cfdriver_ioconf_if_vioif,
2835 		    cfattach_ioconf_if_vioif, cfdata_ioconf_if_vioif);
2836 		break;
2837 	default:
2838 		error = ENOTTY;
2839 		break;
2840 	}
2841 #endif
2842 
2843 	return error;
2844 }
2845