xref: /netbsd-src/sys/dev/pci/if_vioif.c (revision 3117ece4fc4a4ca4489ba793710b60b0d26bab6c)
1 /*	$NetBSD: if_vioif.c,v 1.112 2024/06/29 12:11:12 riastradh Exp $	*/
2 
3 /*
4  * Copyright (c) 2020 The NetBSD Foundation, Inc.
5  * Copyright (c) 2010 Minoura Makoto.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __KERNEL_RCSID(0, "$NetBSD: if_vioif.c,v 1.112 2024/06/29 12:11:12 riastradh Exp $");
31 
32 #ifdef _KERNEL_OPT
33 #include "opt_net_mpsafe.h"
34 #endif
35 
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/kernel.h>
39 #include <sys/atomic.h>
40 #include <sys/bus.h>
41 #include <sys/condvar.h>
42 #include <sys/device.h>
43 #include <sys/evcnt.h>
44 #include <sys/intr.h>
45 #include <sys/kmem.h>
46 #include <sys/mbuf.h>
47 #include <sys/mutex.h>
48 #include <sys/sockio.h>
49 #include <sys/syslog.h>
50 #include <sys/cpu.h>
51 #include <sys/module.h>
52 #include <sys/pcq.h>
53 #include <sys/workqueue.h>
54 #include <sys/xcall.h>
55 
56 #include <dev/pci/virtioreg.h>
57 #include <dev/pci/virtiovar.h>
58 
59 #include <net/if.h>
60 #include <net/if_dl.h>
61 #include <net/if_media.h>
62 #include <net/if_ether.h>
63 
64 #include <net/bpf.h>
65 
66 #include "ioconf.h"
67 
68 #ifdef NET_MPSAFE
69 #define VIOIF_MPSAFE	1
70 #define VIOIF_MULTIQ	1
71 #endif
72 
73 /*
74  * if_vioifreg.h:
75  */
76 /* Configuration registers */
77 #define VIRTIO_NET_CONFIG_MAC		 0 /* 8bit x 6byte */
78 #define VIRTIO_NET_CONFIG_STATUS	 6 /* 16bit */
79 #define VIRTIO_NET_CONFIG_MAX_VQ_PAIRS	 8 /* 16bit */
80 #define VIRTIO_NET_CONFIG_MTU		10 /* 16bit */
81 
82 /* Feature bits */
83 #define VIRTIO_NET_F_CSUM		__BIT(0)
84 #define VIRTIO_NET_F_GUEST_CSUM		__BIT(1)
85 #define VIRTIO_NET_F_MAC		__BIT(5)
86 #define VIRTIO_NET_F_GSO		__BIT(6)
87 #define VIRTIO_NET_F_GUEST_TSO4		__BIT(7)
88 #define VIRTIO_NET_F_GUEST_TSO6		__BIT(8)
89 #define VIRTIO_NET_F_GUEST_ECN		__BIT(9)
90 #define VIRTIO_NET_F_GUEST_UFO		__BIT(10)
91 #define VIRTIO_NET_F_HOST_TSO4		__BIT(11)
92 #define VIRTIO_NET_F_HOST_TSO6		__BIT(12)
93 #define VIRTIO_NET_F_HOST_ECN		__BIT(13)
94 #define VIRTIO_NET_F_HOST_UFO		__BIT(14)
95 #define VIRTIO_NET_F_MRG_RXBUF		__BIT(15)
96 #define VIRTIO_NET_F_STATUS		__BIT(16)
97 #define VIRTIO_NET_F_CTRL_VQ		__BIT(17)
98 #define VIRTIO_NET_F_CTRL_RX		__BIT(18)
99 #define VIRTIO_NET_F_CTRL_VLAN		__BIT(19)
100 #define VIRTIO_NET_F_CTRL_RX_EXTRA	__BIT(20)
101 #define VIRTIO_NET_F_GUEST_ANNOUNCE	__BIT(21)
102 #define VIRTIO_NET_F_MQ			__BIT(22)
103 #define VIRTIO_NET_F_CTRL_MAC_ADDR 	__BIT(23)
104 
105 #define VIRTIO_NET_FLAG_BITS			\
106 	VIRTIO_COMMON_FLAG_BITS			\
107 	"b\x17" "CTRL_MAC\0"			\
108 	"b\x16" "MQ\0"				\
109 	"b\x15" "GUEST_ANNOUNCE\0"		\
110 	"b\x14" "CTRL_RX_EXTRA\0"		\
111 	"b\x13" "CTRL_VLAN\0"			\
112 	"b\x12" "CTRL_RX\0"			\
113 	"b\x11" "CTRL_VQ\0"			\
114 	"b\x10" "STATUS\0"			\
115 	"b\x0f" "MRG_RXBUF\0"			\
116 	"b\x0e" "HOST_UFO\0"			\
117 	"b\x0d" "HOST_ECN\0"			\
118 	"b\x0c" "HOST_TSO6\0"			\
119 	"b\x0b" "HOST_TSO4\0"			\
120 	"b\x0a" "GUEST_UFO\0"			\
121 	"b\x09" "GUEST_ECN\0"			\
122 	"b\x08" "GUEST_TSO6\0"			\
123 	"b\x07" "GUEST_TSO4\0"			\
124 	"b\x06" "GSO\0"				\
125 	"b\x05" "MAC\0"				\
126 	"b\x01" "GUEST_CSUM\0"			\
127 	"b\x00" "CSUM\0"
128 
129 /* Status */
130 #define VIRTIO_NET_S_LINK_UP	1
131 
132 /* Packet header structure */
133 struct virtio_net_hdr {
134 	uint8_t		flags;
135 	uint8_t		gso_type;
136 	uint16_t	hdr_len;
137 	uint16_t	gso_size;
138 	uint16_t	csum_start;
139 	uint16_t	csum_offset;
140 
141 	uint16_t	num_buffers; /* VIRTIO_NET_F_MRG_RXBUF enabled or v1 */
142 } __packed;
143 
144 #define VIRTIO_NET_HDR_F_NEEDS_CSUM	1 /* flags */
145 #define VIRTIO_NET_HDR_GSO_NONE		0 /* gso_type */
146 #define VIRTIO_NET_HDR_GSO_TCPV4	1 /* gso_type */
147 #define VIRTIO_NET_HDR_GSO_UDP		3 /* gso_type */
148 #define VIRTIO_NET_HDR_GSO_TCPV6	4 /* gso_type */
149 #define VIRTIO_NET_HDR_GSO_ECN		0x80 /* gso_type, |'ed */
150 
151 #define VIRTIO_NET_MAX_GSO_LEN		(65536+ETHER_HDR_LEN)
152 
153 /* Control virtqueue */
154 struct virtio_net_ctrl_cmd {
155 	uint8_t	class;
156 	uint8_t	command;
157 } __packed;
158 #define VIRTIO_NET_CTRL_RX		0
159 # define VIRTIO_NET_CTRL_RX_PROMISC	0
160 # define VIRTIO_NET_CTRL_RX_ALLMULTI	1
161 
162 #define VIRTIO_NET_CTRL_MAC		1
163 # define VIRTIO_NET_CTRL_MAC_TABLE_SET	0
164 # define  VIRTIO_NET_CTRL_MAC_ADDR_SET	1
165 
166 #define VIRTIO_NET_CTRL_VLAN		2
167 # define VIRTIO_NET_CTRL_VLAN_ADD	0
168 # define VIRTIO_NET_CTRL_VLAN_DEL	1
169 
170 #define VIRTIO_NET_CTRL_MQ			4
171 # define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET	0
172 # define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN	1
173 # define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX	0x8000
174 
175 struct virtio_net_ctrl_status {
176 	uint8_t	ack;
177 } __packed;
178 #define VIRTIO_NET_OK			0
179 #define VIRTIO_NET_ERR			1
180 
181 struct virtio_net_ctrl_rx {
182 	uint8_t	onoff;
183 } __packed;
184 
185 struct virtio_net_ctrl_mac_tbl {
186 	uint32_t nentries;
187 	uint8_t macs[][ETHER_ADDR_LEN];
188 } __packed;
189 
190 struct virtio_net_ctrl_mac_addr {
191 	uint8_t mac[ETHER_ADDR_LEN];
192 } __packed;
193 
194 struct virtio_net_ctrl_vlan {
195 	uint16_t id;
196 } __packed;
197 
198 struct virtio_net_ctrl_mq {
199 	uint16_t virtqueue_pairs;
200 } __packed;
201 
202 /*
203  * if_vioifvar.h:
204  */
205 
206 /*
207  * Locking notes:
208  * + a field in vioif_netqueue is protected by netq_lock (a spin mutex)
209  *      - more than one lock cannot be held at onece
210  * + a field in vioif_tx_context and vioif_rx_context is also protected
211  *   by netq_lock.
212  * + ctrlq_inuse is protected by ctrlq_wait_lock.
213  *      - other fields in vioif_ctrlqueue are protected by ctrlq_inuse
214  *      - netq_lock cannot be held along with ctrlq_wait_lock
215  * + fields in vioif_softc except queues are protected by
216  *   sc->sc_lock(an adaptive mutex)
217  *      - the lock is held before acquisition of other locks
218  */
219 
220 struct vioif_ctrl_cmdspec {
221 	bus_dmamap_t	dmamap;
222 	void		*buf;
223 	bus_size_t	bufsize;
224 };
225 
226 struct vioif_work {
227 	struct work	 cookie;
228 	void		(*func)(void *);
229 	void		*arg;
230 	unsigned int	 added;
231 };
232 
233 struct vioif_net_map {
234 	struct virtio_net_hdr	*vnm_hdr;
235 	bus_dmamap_t		 vnm_hdr_map;
236 	struct mbuf		*vnm_mbuf;
237 	bus_dmamap_t		 vnm_mbuf_map;
238 };
239 
240 #define VIOIF_NETQ_RX		0
241 #define VIOIF_NETQ_TX		1
242 #define VIOIF_NETQ_IDX		2
243 #define VIOIF_NETQ_DIR(n)	((n) % VIOIF_NETQ_IDX)
244 #define VIOIF_NETQ_PAIRIDX(n)	((n) / VIOIF_NETQ_IDX)
245 #define VIOIF_NETQ_RXQID(n)	((n) * VIOIF_NETQ_IDX + VIOIF_NETQ_RX)
246 #define VIOIF_NETQ_TXQID(n)	((n) * VIOIF_NETQ_IDX + VIOIF_NETQ_TX)
247 
248 struct vioif_netqueue {
249 	kmutex_t		 netq_lock;
250 	struct virtqueue	*netq_vq;
251 	bool			 netq_stopping;
252 	bool			 netq_running_handle;
253 	void			*netq_maps_kva;
254 	struct vioif_net_map	*netq_maps;
255 
256 	void			*netq_softint;
257 	struct vioif_work	 netq_work;
258 	bool			 netq_workqueue;
259 
260 	char			 netq_evgroup[32];
261 	struct evcnt		 netq_mbuf_load_failed;
262 	struct evcnt		 netq_enqueue_failed;
263 
264 	void			*netq_ctx;
265 };
266 
267 struct vioif_tx_context {
268 	bool			 txc_link_active;
269 	bool			 txc_no_free_slots;
270 	pcq_t			*txc_intrq;
271 	void			*txc_deferred_transmit;
272 
273 	struct evcnt		 txc_defrag_failed;
274 };
275 
276 struct vioif_rx_context {
277 	struct evcnt		 rxc_mbuf_enobufs;
278 };
279 struct vioif_ctrlqueue {
280 	struct virtqueue		*ctrlq_vq;
281 	enum {
282 		FREE, INUSE, DONE
283 	}				ctrlq_inuse;
284 	kcondvar_t			ctrlq_wait;
285 	kmutex_t			ctrlq_wait_lock;
286 	struct lwp			*ctrlq_owner;
287 
288 	struct virtio_net_ctrl_cmd	*ctrlq_cmd;
289 	struct virtio_net_ctrl_status	*ctrlq_status;
290 	struct virtio_net_ctrl_rx	*ctrlq_rx;
291 	struct virtio_net_ctrl_mac_tbl	*ctrlq_mac_tbl_uc;
292 	struct virtio_net_ctrl_mac_tbl	*ctrlq_mac_tbl_mc;
293 	struct virtio_net_ctrl_mac_addr	*ctrlq_mac_addr;
294 	struct virtio_net_ctrl_mq	*ctrlq_mq;
295 
296 	bus_dmamap_t			ctrlq_cmd_dmamap;
297 	bus_dmamap_t			ctrlq_status_dmamap;
298 	bus_dmamap_t			ctrlq_rx_dmamap;
299 	bus_dmamap_t			ctrlq_tbl_uc_dmamap;
300 	bus_dmamap_t			ctrlq_tbl_mc_dmamap;
301 	bus_dmamap_t			ctrlq_mac_addr_dmamap;
302 	bus_dmamap_t			ctrlq_mq_dmamap;
303 
304 	struct evcnt			ctrlq_cmd_load_failed;
305 	struct evcnt			ctrlq_cmd_failed;
306 };
307 
308 struct vioif_softc {
309 	device_t		sc_dev;
310 	kmutex_t		sc_lock;
311 	struct sysctllog	*sc_sysctllog;
312 
313 	struct virtio_softc	*sc_virtio;
314 	struct virtqueue	*sc_vqs;
315 	u_int			 sc_hdr_size;
316 
317 	int			sc_max_nvq_pairs;
318 	int			sc_req_nvq_pairs;
319 	int			sc_act_nvq_pairs;
320 
321 	uint8_t			sc_mac[ETHER_ADDR_LEN];
322 	struct ethercom		sc_ethercom;
323 	int			sc_link_state;
324 
325 	struct vioif_netqueue	*sc_netqs;
326 
327 	bool			sc_has_ctrl;
328 	struct vioif_ctrlqueue	sc_ctrlq;
329 
330 	bus_dma_segment_t	 sc_segs[1];
331 	void			*sc_dmamem;
332 	void			*sc_kmem;
333 
334 	void			*sc_cfg_softint;
335 
336 	struct workqueue	*sc_txrx_workqueue;
337 	bool			 sc_txrx_workqueue_sysctl;
338 	u_int			 sc_tx_intr_process_limit;
339 	u_int			 sc_tx_process_limit;
340 	u_int			 sc_rx_intr_process_limit;
341 	u_int			 sc_rx_process_limit;
342 };
343 #define VIRTIO_NET_TX_MAXNSEGS		(16) /* XXX */
344 #define VIRTIO_NET_CTRL_MAC_MAXENTRIES	(64) /* XXX */
345 
346 #define VIOIF_TX_INTR_PROCESS_LIMIT	256
347 #define VIOIF_TX_PROCESS_LIMIT		256
348 #define VIOIF_RX_INTR_PROCESS_LIMIT	0U
349 #define VIOIF_RX_PROCESS_LIMIT		256
350 
351 #define VIOIF_WORKQUEUE_PRI		PRI_SOFTNET
352 #define VIOIF_IS_LINK_ACTIVE(_sc)	((_sc)->sc_link_state == LINK_STATE_UP ? \
353 					    true : false)
354 
355 /* cfattach interface functions */
356 static int	vioif_match(device_t, cfdata_t, void *);
357 static void	vioif_attach(device_t, device_t, void *);
358 static int	vioif_finalize_teardown(device_t);
359 
360 /* ifnet interface functions */
361 static int	vioif_init(struct ifnet *);
362 static void	vioif_stop(struct ifnet *, int);
363 static void	vioif_start(struct ifnet *);
364 static int	vioif_transmit(struct ifnet *, struct mbuf *);
365 static int	vioif_ioctl(struct ifnet *, u_long, void *);
366 static void	vioif_watchdog(struct ifnet *);
367 static int	vioif_ifflags(struct vioif_softc *);
368 static int	vioif_ifflags_cb(struct ethercom *);
369 
370 /* tx & rx */
371 static int	vioif_netqueue_init(struct vioif_softc *,
372 		    struct virtio_softc *, size_t, u_int);
373 static void	vioif_netqueue_teardown(struct vioif_softc *,
374 		    struct virtio_softc *, size_t);
375 static void	vioif_net_intr_enable(struct vioif_softc *,
376 		    struct virtio_softc *);
377 static void	vioif_net_intr_disable(struct vioif_softc *,
378 		    struct virtio_softc *);
379 static void	vioif_net_sched_handle(struct vioif_softc *,
380 		    struct vioif_netqueue *);
381 
382 /* rx */
383 static void	vioif_populate_rx_mbufs_locked(struct vioif_softc *,
384 		    struct vioif_netqueue *);
385 static int	vioif_rx_intr(void *);
386 static void	vioif_rx_handle(void *);
387 static void	vioif_rx_queue_clear(struct vioif_softc *,
388 		    struct virtio_softc *, struct vioif_netqueue *);
389 
390 /* tx */
391 static void	vioif_start_locked(struct ifnet *, struct vioif_netqueue *);
392 static void	vioif_transmit_locked(struct ifnet *, struct vioif_netqueue *);
393 static void	vioif_deferred_transmit(void *);
394 static int	vioif_tx_intr(void *);
395 static void	vioif_tx_handle(void *);
396 static void	vioif_tx_queue_clear(struct vioif_softc *, struct virtio_softc *,
397 		    struct vioif_netqueue *);
398 
399 /* controls */
400 static int	vioif_ctrl_intr(void *);
401 static int	vioif_ctrl_rx(struct vioif_softc *, int, bool);
402 static int	vioif_set_promisc(struct vioif_softc *, bool);
403 static int	vioif_set_allmulti(struct vioif_softc *, bool);
404 static int	vioif_set_rx_filter(struct vioif_softc *);
405 static int	vioif_rx_filter(struct vioif_softc *);
406 static int	vioif_set_mac_addr(struct vioif_softc *);
407 static int	vioif_ctrl_mq_vq_pairs_set(struct vioif_softc *, int);
408 
409 /* config interrupt */
410 static int	vioif_config_change(struct virtio_softc *);
411 static void	vioif_cfg_softint(void *);
412 static void	vioif_update_link_status(struct vioif_softc *);
413 
414 /* others */
415 static void	vioif_alloc_queues(struct vioif_softc *);
416 static void	vioif_free_queues(struct vioif_softc *);
417 static int	vioif_alloc_mems(struct vioif_softc *);
418 static struct workqueue*
419 		vioif_workq_create(const char *, pri_t, int, int);
420 static void	vioif_workq_destroy(struct workqueue *);
421 static void	vioif_work_set(struct vioif_work *, void(*)(void *), void *);
422 static void	vioif_work_add(struct workqueue *, struct vioif_work *);
423 static void	vioif_work_wait(struct workqueue *, struct vioif_work *);
424 static int	vioif_setup_sysctl(struct vioif_softc *);
425 static void	vioif_setup_stats(struct vioif_softc *);
426 
427 CFATTACH_DECL_NEW(vioif, sizeof(struct vioif_softc),
428 		  vioif_match, vioif_attach, NULL, NULL);
429 
430 static void
431 vioif_intr_barrier(void)
432 {
433 
434 	/* wait for finish all interrupt handler */
435 	xc_barrier(0);
436 }
437 
438 static void
439 vioif_notify(struct virtio_softc *vsc, struct virtqueue *vq)
440 {
441 
442 	virtio_enqueue_commit(vsc, vq, -1, true);
443 }
444 
445 static int
446 vioif_match(device_t parent, cfdata_t match, void *aux)
447 {
448 	struct virtio_attach_args *va = aux;
449 
450 	if (va->sc_childdevid == VIRTIO_DEVICE_ID_NETWORK)
451 		return 1;
452 
453 	return 0;
454 }
455 
456 static void
457 vioif_attach(device_t parent, device_t self, void *aux)
458 {
459 	struct vioif_softc *sc = device_private(self);
460 	struct virtio_softc *vsc = device_private(parent);
461 	struct vioif_netqueue *txq0;
462 	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
463 	uint64_t features, req_features;
464 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
465 	u_int softint_flags;
466 	int r, i, req_flags;
467 	char xnamebuf[MAXCOMLEN];
468 	size_t nvqs;
469 
470 	if (virtio_child(vsc) != NULL) {
471 		aprint_normal(": child already attached for %s; "
472 		    "something wrong...\n", device_xname(parent));
473 		return;
474 	}
475 
476 	sc->sc_dev = self;
477 	sc->sc_virtio = vsc;
478 	sc->sc_link_state = LINK_STATE_UNKNOWN;
479 
480 	sc->sc_max_nvq_pairs = 1;
481 	sc->sc_req_nvq_pairs = 1;
482 	sc->sc_act_nvq_pairs = 1;
483 	sc->sc_txrx_workqueue_sysctl = true;
484 	sc->sc_tx_intr_process_limit = VIOIF_TX_INTR_PROCESS_LIMIT;
485 	sc->sc_tx_process_limit = VIOIF_TX_PROCESS_LIMIT;
486 	sc->sc_rx_intr_process_limit = VIOIF_RX_INTR_PROCESS_LIMIT;
487 	sc->sc_rx_process_limit = VIOIF_RX_PROCESS_LIMIT;
488 
489 	mutex_init(&sc->sc_lock, MUTEX_DEFAULT, IPL_NONE);
490 
491 	snprintf(xnamebuf, sizeof(xnamebuf), "%s_txrx", device_xname(self));
492 	sc->sc_txrx_workqueue = vioif_workq_create(xnamebuf, VIOIF_WORKQUEUE_PRI,
493 	    IPL_NET, WQ_PERCPU | WQ_MPSAFE);
494 	if (sc->sc_txrx_workqueue == NULL)
495 		goto err;
496 
497 	req_flags = 0;
498 
499 #ifdef VIOIF_MPSAFE
500 	req_flags |= VIRTIO_F_INTR_MPSAFE;
501 #endif
502 	req_flags |= VIRTIO_F_INTR_MSIX;
503 
504 	req_features =
505 	    VIRTIO_NET_F_MAC | VIRTIO_NET_F_STATUS | VIRTIO_NET_F_CTRL_VQ |
506 	    VIRTIO_NET_F_CTRL_RX | VIRTIO_F_NOTIFY_ON_EMPTY;
507 	req_features |= VIRTIO_F_RING_EVENT_IDX;
508 	req_features |= VIRTIO_NET_F_CTRL_MAC_ADDR;
509 #ifdef VIOIF_MULTIQ
510 	req_features |= VIRTIO_NET_F_MQ;
511 #endif
512 
513 	virtio_child_attach_start(vsc, self, IPL_NET,
514 	    req_features, VIRTIO_NET_FLAG_BITS);
515 	features = virtio_features(vsc);
516 
517 	if (features == 0)
518 		goto err;
519 
520 	if (features & VIRTIO_NET_F_MAC) {
521 		for (i = 0; i < __arraycount(sc->sc_mac); i++) {
522 			sc->sc_mac[i] = virtio_read_device_config_1(vsc,
523 			    VIRTIO_NET_CONFIG_MAC + i);
524 		}
525 	} else {
526 		/* code stolen from sys/net/if_tap.c */
527 		struct timeval tv;
528 		uint32_t ui;
529 		getmicrouptime(&tv);
530 		ui = (tv.tv_sec ^ tv.tv_usec) & 0xffffff;
531 		memcpy(sc->sc_mac+3, (uint8_t *)&ui, 3);
532 		for (i = 0; i < __arraycount(sc->sc_mac); i++) {
533 			virtio_write_device_config_1(vsc,
534 			    VIRTIO_NET_CONFIG_MAC + i, sc->sc_mac[i]);
535 		}
536 	}
537 
538 	/* 'Ethernet' with capital follows other ethernet driver attachment */
539 	aprint_normal_dev(self, "Ethernet address %s\n",
540 	    ether_sprintf(sc->sc_mac));
541 
542 	if (features & (VIRTIO_NET_F_MRG_RXBUF | VIRTIO_F_VERSION_1)) {
543 		sc->sc_hdr_size = sizeof(struct virtio_net_hdr);
544 	} else {
545 		sc->sc_hdr_size = offsetof(struct virtio_net_hdr, num_buffers);
546 	}
547 
548 	if ((features & VIRTIO_NET_F_CTRL_VQ) &&
549 	    (features & VIRTIO_NET_F_CTRL_RX)) {
550 		sc->sc_has_ctrl = true;
551 
552 		cv_init(&ctrlq->ctrlq_wait, "ctrl_vq");
553 		mutex_init(&ctrlq->ctrlq_wait_lock, MUTEX_DEFAULT, IPL_NET);
554 		ctrlq->ctrlq_inuse = FREE;
555 	} else {
556 		sc->sc_has_ctrl = false;
557 	}
558 
559 	if (sc->sc_has_ctrl && (features & VIRTIO_NET_F_MQ)) {
560 		sc->sc_max_nvq_pairs = virtio_read_device_config_2(vsc,
561 		    VIRTIO_NET_CONFIG_MAX_VQ_PAIRS);
562 
563 		if (sc->sc_max_nvq_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX)
564 			goto err;
565 
566 		/* Limit the number of queue pairs to use */
567 		sc->sc_req_nvq_pairs = MIN(sc->sc_max_nvq_pairs, ncpu);
568 
569 		if (sc->sc_max_nvq_pairs > 1)
570 			req_flags |= VIRTIO_F_INTR_PERVQ;
571 	}
572 
573 	vioif_alloc_queues(sc);
574 
575 #ifdef VIOIF_MPSAFE
576 	softint_flags = SOFTINT_NET | SOFTINT_MPSAFE;
577 #else
578 	softint_flags = SOFTINT_NET;
579 #endif
580 
581 	/*
582 	 * Initialize network queues
583 	 */
584 	nvqs = sc->sc_max_nvq_pairs * 2;
585 	for (i = 0; i < nvqs; i++) {
586 		r = vioif_netqueue_init(sc, vsc, i, softint_flags);
587 		if (r != 0)
588 			goto err;
589 	}
590 
591 	if (sc->sc_has_ctrl) {
592 		int ctrlq_idx = nvqs;
593 
594 		nvqs++;
595 		/*
596 		 * Allocating a virtqueue for control channel
597 		 */
598 		sc->sc_ctrlq.ctrlq_vq = &sc->sc_vqs[ctrlq_idx];
599 		virtio_init_vq(vsc, ctrlq->ctrlq_vq, ctrlq_idx,
600 		    vioif_ctrl_intr, ctrlq);
601 
602 		r = virtio_alloc_vq(vsc, ctrlq->ctrlq_vq, NBPG, 1, "control");
603 		if (r != 0) {
604 			aprint_error_dev(self, "failed to allocate "
605 			    "a virtqueue for control channel, error code %d\n",
606 			    r);
607 
608 			sc->sc_has_ctrl = false;
609 			cv_destroy(&ctrlq->ctrlq_wait);
610 			mutex_destroy(&ctrlq->ctrlq_wait_lock);
611 		}
612 	}
613 
614 	sc->sc_cfg_softint = softint_establish(softint_flags,
615 	    vioif_cfg_softint, sc);
616 	if (sc->sc_cfg_softint == NULL) {
617 		aprint_error_dev(self, "cannot establish ctl softint\n");
618 		goto err;
619 	}
620 
621 	if (vioif_alloc_mems(sc) < 0)
622 		goto err;
623 
624 	r = virtio_child_attach_finish(vsc, sc->sc_vqs, nvqs,
625 	    vioif_config_change, req_flags);
626 	if (r != 0)
627 		goto err;
628 
629 	if (vioif_setup_sysctl(sc) != 0) {
630 		aprint_error_dev(self, "unable to create sysctl node\n");
631 		/* continue */
632 	}
633 
634 	vioif_setup_stats(sc);
635 
636 	strlcpy(ifp->if_xname, device_xname(self), IFNAMSIZ);
637 	ifp->if_softc = sc;
638 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
639 #ifdef VIOIF_MPSAFE
640 	ifp->if_extflags = IFEF_MPSAFE;
641 #endif
642 	ifp->if_start = vioif_start;
643 	if (sc->sc_req_nvq_pairs > 1)
644 		ifp->if_transmit = vioif_transmit;
645 	ifp->if_ioctl = vioif_ioctl;
646 	ifp->if_init = vioif_init;
647 	ifp->if_stop = vioif_stop;
648 	ifp->if_capabilities = 0;
649 	ifp->if_watchdog = vioif_watchdog;
650 	txq0 = &sc->sc_netqs[VIOIF_NETQ_TXQID(0)];
651 	IFQ_SET_MAXLEN(&ifp->if_snd, MAX(txq0->netq_vq->vq_num, IFQ_MAXLEN));
652 	IFQ_SET_READY(&ifp->if_snd);
653 
654 	sc->sc_ethercom.ec_capabilities |= ETHERCAP_VLAN_MTU;
655 
656 	if_attach(ifp);
657 	if_deferred_start_init(ifp, NULL);
658 	ether_ifattach(ifp, sc->sc_mac);
659 	ether_set_ifflags_cb(&sc->sc_ethercom, vioif_ifflags_cb);
660 
661 	return;
662 
663 err:
664 	nvqs = sc->sc_max_nvq_pairs * 2;
665 	for (i = 0; i < nvqs; i++) {
666 		vioif_netqueue_teardown(sc, vsc, i);
667 	}
668 
669 	if (sc->sc_has_ctrl) {
670 		cv_destroy(&ctrlq->ctrlq_wait);
671 		mutex_destroy(&ctrlq->ctrlq_wait_lock);
672 		virtio_free_vq(vsc, ctrlq->ctrlq_vq);
673 		ctrlq->ctrlq_vq = NULL;
674 	}
675 
676 	vioif_free_queues(sc);
677 	mutex_destroy(&sc->sc_lock);
678 	virtio_child_attach_failed(vsc);
679 	config_finalize_register(self, vioif_finalize_teardown);
680 
681 	return;
682 }
683 
684 static int
685 vioif_finalize_teardown(device_t self)
686 {
687 	struct vioif_softc *sc = device_private(self);
688 
689 	if (sc->sc_txrx_workqueue != NULL) {
690 		vioif_workq_destroy(sc->sc_txrx_workqueue);
691 		sc->sc_txrx_workqueue = NULL;
692 	}
693 
694 	return 0;
695 }
696 
697 /*
698  * Interface functions for ifnet
699  */
700 static int
701 vioif_init(struct ifnet *ifp)
702 {
703 	struct vioif_softc *sc = ifp->if_softc;
704 	struct virtio_softc *vsc = sc->sc_virtio;
705 	struct vioif_netqueue *netq;
706 	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
707 	int r, i;
708 
709 	vioif_stop(ifp, 0);
710 
711 	r = virtio_reinit_start(vsc);
712 	if (r != 0) {
713 		log(LOG_ERR, "%s: reset failed\n", ifp->if_xname);
714 		return EIO;
715 	}
716 
717 	virtio_negotiate_features(vsc, virtio_features(vsc));
718 
719 	for (i = 0; i < sc->sc_req_nvq_pairs; i++) {
720 		netq = &sc->sc_netqs[VIOIF_NETQ_RXQID(i)];
721 
722 		mutex_enter(&netq->netq_lock);
723 		vioif_populate_rx_mbufs_locked(sc, netq);
724 		mutex_exit(&netq->netq_lock);
725 	}
726 
727 	virtio_reinit_end(vsc);
728 
729 	if (sc->sc_has_ctrl)
730 		virtio_start_vq_intr(vsc, ctrlq->ctrlq_vq);
731 
732 	r = vioif_ctrl_mq_vq_pairs_set(sc, sc->sc_req_nvq_pairs);
733 	if (r == 0)
734 		sc->sc_act_nvq_pairs = sc->sc_req_nvq_pairs;
735 	else
736 		sc->sc_act_nvq_pairs = 1;
737 
738 	SET(ifp->if_flags, IFF_RUNNING);
739 
740 	vioif_net_intr_enable(sc, vsc);
741 
742 	vioif_update_link_status(sc);
743 	r = vioif_rx_filter(sc);
744 
745 	return r;
746 }
747 
748 static void
749 vioif_stop(struct ifnet *ifp, int disable)
750 {
751 	struct vioif_softc *sc = ifp->if_softc;
752 	struct virtio_softc *vsc = sc->sc_virtio;
753 	struct vioif_netqueue *netq;
754 	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
755 	size_t i, act_qnum;
756 
757 	act_qnum = sc->sc_act_nvq_pairs * 2;
758 
759 	CLR(ifp->if_flags, IFF_RUNNING);
760 	for (i = 0; i < act_qnum; i++) {
761 		netq = &sc->sc_netqs[i];
762 
763 		mutex_enter(&netq->netq_lock);
764 		netq->netq_stopping = true;
765 		mutex_exit(&netq->netq_lock);
766 	}
767 
768 	/* disable interrupts */
769 	vioif_net_intr_disable(sc, vsc);
770 	if (sc->sc_has_ctrl)
771 		virtio_stop_vq_intr(vsc, ctrlq->ctrlq_vq);
772 
773 	/*
774 	 * only way to stop interrupt, I/O and DMA is resetting...
775 	 *
776 	 * NOTE: Devices based on VirtIO draft specification can not
777 	 * stop interrupt completely even if virtio_stop_vq_intr() is called.
778 	 */
779 	virtio_reset(vsc);
780 
781 	vioif_intr_barrier();
782 
783 	for (i = 0; i < act_qnum; i++) {
784 		netq = &sc->sc_netqs[i];
785 		vioif_work_wait(sc->sc_txrx_workqueue, &netq->netq_work);
786 	}
787 
788 	for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
789 		netq = &sc->sc_netqs[VIOIF_NETQ_RXQID(i)];
790 		vioif_rx_queue_clear(sc, vsc, netq);
791 
792 		netq = &sc->sc_netqs[VIOIF_NETQ_TXQID(i)];
793 		vioif_tx_queue_clear(sc, vsc, netq);
794 	}
795 
796 	/* all packet processing is stopped */
797 	for (i = 0; i < act_qnum; i++) {
798 		netq = &sc->sc_netqs[i];
799 
800 		mutex_enter(&netq->netq_lock);
801 		netq->netq_stopping = false;
802 		mutex_exit(&netq->netq_lock);
803 	}
804 }
805 
806 static void
807 vioif_start(struct ifnet *ifp)
808 {
809 	struct vioif_softc *sc = ifp->if_softc;
810 	struct vioif_netqueue *txq0 = &sc->sc_netqs[VIOIF_NETQ_TXQID(0)];
811 
812 #ifdef VIOIF_MPSAFE
813 	KASSERT(if_is_mpsafe(ifp));
814 #endif
815 
816 	mutex_enter(&txq0->netq_lock);
817 	vioif_start_locked(ifp, txq0);
818 	mutex_exit(&txq0->netq_lock);
819 }
820 
821 static inline int
822 vioif_select_txqueue(struct ifnet *ifp, struct mbuf *m)
823 {
824 	struct vioif_softc *sc = ifp->if_softc;
825 	u_int cpuid = cpu_index(curcpu());
826 
827 	return VIOIF_NETQ_TXQID(cpuid % sc->sc_act_nvq_pairs);
828 }
829 
830 static int
831 vioif_transmit(struct ifnet *ifp, struct mbuf *m)
832 {
833 	struct vioif_softc *sc = ifp->if_softc;
834 	struct vioif_netqueue *netq;
835 	struct vioif_tx_context *txc;
836 	int qid;
837 
838 	qid = vioif_select_txqueue(ifp, m);
839 	netq = &sc->sc_netqs[qid];
840 	txc = netq->netq_ctx;
841 
842 	if (__predict_false(!pcq_put(txc->txc_intrq, m))) {
843 		m_freem(m);
844 		return ENOBUFS;
845 	}
846 
847 	net_stat_ref_t nsr = IF_STAT_GETREF(ifp);
848 	if_statadd_ref(ifp, nsr, if_obytes, m->m_pkthdr.len);
849 	if (m->m_flags & M_MCAST)
850 		if_statinc_ref(ifp, nsr, if_omcasts);
851 	IF_STAT_PUTREF(ifp);
852 
853 	if (mutex_tryenter(&netq->netq_lock)) {
854 		vioif_transmit_locked(ifp, netq);
855 		mutex_exit(&netq->netq_lock);
856 	}
857 
858 	return 0;
859 }
860 
861 void
862 vioif_watchdog(struct ifnet *ifp)
863 {
864 	struct vioif_softc *sc = ifp->if_softc;
865 	struct vioif_netqueue *netq;
866 	int i;
867 
868 	if (ISSET(ifp->if_flags, IFF_RUNNING)) {
869 		if (ISSET(ifp->if_flags, IFF_DEBUG)) {
870 			log(LOG_DEBUG, "%s: watchdog timed out\n",
871 			    ifp->if_xname);
872 		}
873 
874 		for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
875 			netq = &sc->sc_netqs[VIOIF_NETQ_TXQID(i)];
876 
877 			mutex_enter(&netq->netq_lock);
878 			if (!netq->netq_running_handle) {
879 				netq->netq_running_handle = true;
880 				vioif_net_sched_handle(sc, netq);
881 			}
882 			mutex_exit(&netq->netq_lock);
883 		}
884 	}
885 }
886 
887 static int
888 vioif_ioctl(struct ifnet *ifp, u_long cmd, void *data)
889 {
890 	int s, r;
891 
892 	s = splnet();
893 
894 	r = ether_ioctl(ifp, cmd, data);
895 	if (r == ENETRESET && (cmd == SIOCADDMULTI || cmd == SIOCDELMULTI)) {
896 		if (ifp->if_flags & IFF_RUNNING) {
897 			r = vioif_rx_filter(ifp->if_softc);
898 		} else {
899 			r = 0;
900 		}
901 	}
902 
903 	splx(s);
904 
905 	return r;
906 }
907 
908 static int
909 vioif_ifflags(struct vioif_softc *sc)
910 {
911 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
912 	bool onoff;
913 	int r;
914 
915 	if (!sc->sc_has_ctrl) {
916 		/* no ctrl vq; always promisc and allmulti */
917 		ifp->if_flags |= (IFF_PROMISC | IFF_ALLMULTI);
918 		return 0;
919 	}
920 
921 	onoff = ifp->if_flags & IFF_ALLMULTI ? true : false;
922 	r = vioif_set_allmulti(sc, onoff);
923 	if (r != 0) {
924 		log(LOG_WARNING,
925 		    "%s: couldn't %sable ALLMULTI\n",
926 		    ifp->if_xname, onoff ? "en" : "dis");
927 		if (onoff) {
928 			CLR(ifp->if_flags, IFF_ALLMULTI);
929 		} else {
930 			SET(ifp->if_flags, IFF_ALLMULTI);
931 		}
932 	}
933 
934 	onoff = ifp->if_flags & IFF_PROMISC ? true : false;
935 	r = vioif_set_promisc(sc, onoff);
936 	if (r != 0) {
937 		log(LOG_WARNING,
938 		    "%s: couldn't %sable PROMISC\n",
939 		    ifp->if_xname, onoff ? "en" : "dis");
940 		if (onoff) {
941 			CLR(ifp->if_flags, IFF_PROMISC);
942 		} else {
943 			SET(ifp->if_flags, IFF_PROMISC);
944 		}
945 	}
946 
947 	return 0;
948 }
949 
950 static int
951 vioif_ifflags_cb(struct ethercom *ec)
952 {
953 	struct ifnet *ifp = &ec->ec_if;
954 	struct vioif_softc *sc = ifp->if_softc;
955 
956 	return vioif_ifflags(sc);
957 }
958 
959 static int
960 vioif_setup_sysctl(struct vioif_softc *sc)
961 {
962 	const char *devname;
963 	struct sysctllog **log;
964 	const struct sysctlnode *rnode, *rxnode, *txnode;
965 	int error;
966 
967 	log = &sc->sc_sysctllog;
968 	devname = device_xname(sc->sc_dev);
969 
970 	error = sysctl_createv(log, 0, NULL, &rnode,
971 	    0, CTLTYPE_NODE, devname,
972 	    SYSCTL_DESCR("virtio-net information and settings"),
973 	    NULL, 0, NULL, 0, CTL_HW, CTL_CREATE, CTL_EOL);
974 	if (error)
975 		goto out;
976 
977 	error = sysctl_createv(log, 0, &rnode, NULL,
978 	    CTLFLAG_READWRITE, CTLTYPE_BOOL, "txrx_workqueue",
979 	    SYSCTL_DESCR("Use workqueue for packet processing"),
980 	    NULL, 0, &sc->sc_txrx_workqueue_sysctl, 0, CTL_CREATE, CTL_EOL);
981 	if (error)
982 		goto out;
983 
984 	error = sysctl_createv(log, 0, &rnode, &rxnode,
985 	    0, CTLTYPE_NODE, "rx",
986 	    SYSCTL_DESCR("virtio-net information and settings for Rx"),
987 	    NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL);
988 	if (error)
989 		goto out;
990 
991 	error = sysctl_createv(log, 0, &rxnode, NULL,
992 	    CTLFLAG_READWRITE, CTLTYPE_INT, "intr_process_limit",
993 	    SYSCTL_DESCR("max number of Rx packets to process for interrupt processing"),
994 	    NULL, 0, &sc->sc_rx_intr_process_limit, 0, CTL_CREATE, CTL_EOL);
995 	if (error)
996 		goto out;
997 
998 	error = sysctl_createv(log, 0, &rxnode, NULL,
999 	    CTLFLAG_READWRITE, CTLTYPE_INT, "process_limit",
1000 	    SYSCTL_DESCR("max number of Rx packets to process for deferred processing"),
1001 	    NULL, 0, &sc->sc_rx_process_limit, 0, CTL_CREATE, CTL_EOL);
1002 	if (error)
1003 		goto out;
1004 
1005 	error = sysctl_createv(log, 0, &rnode, &txnode,
1006 	    0, CTLTYPE_NODE, "tx",
1007 	    SYSCTL_DESCR("virtio-net information and settings for Tx"),
1008 	    NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL);
1009 	if (error)
1010 		goto out;
1011 
1012 	error = sysctl_createv(log, 0, &txnode, NULL,
1013 	    CTLFLAG_READWRITE, CTLTYPE_INT, "intr_process_limit",
1014 	    SYSCTL_DESCR("max number of Tx packets to process for interrupt processing"),
1015 	    NULL, 0, &sc->sc_tx_intr_process_limit, 0, CTL_CREATE, CTL_EOL);
1016 	if (error)
1017 		goto out;
1018 
1019 	error = sysctl_createv(log, 0, &txnode, NULL,
1020 	    CTLFLAG_READWRITE, CTLTYPE_INT, "process_limit",
1021 	    SYSCTL_DESCR("max number of Tx packets to process for deferred processing"),
1022 	    NULL, 0, &sc->sc_tx_process_limit, 0, CTL_CREATE, CTL_EOL);
1023 
1024 out:
1025 	if (error)
1026 		sysctl_teardown(log);
1027 
1028 	return error;
1029 }
1030 
1031 static void
1032 vioif_setup_stats(struct vioif_softc *sc)
1033 {
1034 	struct vioif_netqueue *netq;
1035 	struct vioif_tx_context *txc;
1036 	struct vioif_rx_context *rxc;
1037 	size_t i, netq_num;
1038 
1039 	netq_num = sc->sc_max_nvq_pairs * 2;
1040 	for (i = 0; i < netq_num; i++) {
1041 		netq = &sc->sc_netqs[i];
1042 		evcnt_attach_dynamic(&netq->netq_mbuf_load_failed, EVCNT_TYPE_MISC,
1043 		    NULL, netq->netq_evgroup, "failed to load mbuf to DMA");
1044 		evcnt_attach_dynamic(&netq->netq_enqueue_failed,
1045 		    EVCNT_TYPE_MISC, NULL, netq->netq_evgroup,
1046 		    "virtqueue enqueue failed failed");
1047 
1048 		switch (VIOIF_NETQ_DIR(i)) {
1049 		case VIOIF_NETQ_RX:
1050 			rxc = netq->netq_ctx;
1051 			evcnt_attach_dynamic(&rxc->rxc_mbuf_enobufs,
1052 			    EVCNT_TYPE_MISC, NULL, netq->netq_evgroup,
1053 			    "no receive buffer");
1054 			break;
1055 		case VIOIF_NETQ_TX:
1056 			txc = netq->netq_ctx;
1057 			evcnt_attach_dynamic(&txc->txc_defrag_failed,
1058 			    EVCNT_TYPE_MISC, NULL, netq->netq_evgroup,
1059 			    "m_defrag() failed");
1060 			break;
1061 		}
1062 	}
1063 
1064 	evcnt_attach_dynamic(&sc->sc_ctrlq.ctrlq_cmd_load_failed, EVCNT_TYPE_MISC,
1065 	    NULL, device_xname(sc->sc_dev), "control command dmamap load failed");
1066 	evcnt_attach_dynamic(&sc->sc_ctrlq.ctrlq_cmd_failed, EVCNT_TYPE_MISC,
1067 	    NULL, device_xname(sc->sc_dev), "control command failed");
1068 }
1069 
1070 /*
1071  * allocate memory
1072  */
1073 static int
1074 vioif_dmamap_create(struct vioif_softc *sc, bus_dmamap_t *map,
1075     bus_size_t size, int nsegs, const char *usage)
1076 {
1077 	int r;
1078 
1079 	r = bus_dmamap_create(virtio_dmat(sc->sc_virtio), size,
1080 	    nsegs, size, 0, BUS_DMA_NOWAIT | BUS_DMA_ALLOCNOW, map);
1081 
1082 	if (r != 0) {
1083 		aprint_error_dev(sc->sc_dev, "%s dmamap creation failed, "
1084 		    "error code %d\n", usage, r);
1085 	}
1086 
1087 	return r;
1088 }
1089 
1090 static void
1091 vioif_dmamap_destroy(struct vioif_softc *sc, bus_dmamap_t *map)
1092 {
1093 
1094 	if (*map) {
1095 		bus_dmamap_destroy(virtio_dmat(sc->sc_virtio), *map);
1096 		*map = NULL;
1097 	}
1098 }
1099 
1100 static int
1101 vioif_dmamap_create_load(struct vioif_softc *sc, bus_dmamap_t *map,
1102     void *buf, bus_size_t size, int nsegs, int rw, const char *usage)
1103 {
1104 	int r;
1105 
1106 	r = vioif_dmamap_create(sc, map, size, nsegs, usage);
1107 	if (r != 0)
1108 		return 1;
1109 
1110 	r = bus_dmamap_load(virtio_dmat(sc->sc_virtio), *map, buf,
1111 	    size, NULL, rw | BUS_DMA_NOWAIT);
1112 	if (r != 0) {
1113 		vioif_dmamap_destroy(sc, map);
1114 		aprint_error_dev(sc->sc_dev, "%s dmamap load failed. "
1115 		    "error code %d\n", usage, r);
1116 	}
1117 
1118 	return r;
1119 }
1120 
1121 static void *
1122 vioif_assign_mem(intptr_t *p, size_t size)
1123 {
1124 	intptr_t rv;
1125 
1126 	rv = *p;
1127 	*p += size;
1128 
1129 	return (void *)rv;
1130 }
1131 
1132 /*
1133  * dma memory is used for:
1134  *   netq_maps_kva:	 metadata array for received frames (READ) and
1135  *			 sent frames (WRITE)
1136  *   ctrlq_cmd:		 command to be sent via ctrl vq (WRITE)
1137  *   ctrlq_status:	 return value for a command via ctrl vq (READ)
1138  *   ctrlq_rx:		 parameter for a VIRTIO_NET_CTRL_RX class command
1139  *			 (WRITE)
1140  *   ctrlq_mac_tbl_uc:	 unicast MAC address filter for a VIRTIO_NET_CTRL_MAC
1141  *			 class command (WRITE)
1142  *   ctrlq_mac_tbl_mc:	 multicast MAC address filter for a VIRTIO_NET_CTRL_MAC
1143  *			 class command (WRITE)
1144  * ctrlq_* structures are allocated only one each; they are protected by
1145  * ctrlq_inuse variable and ctrlq_wait condvar.
1146  */
1147 static int
1148 vioif_alloc_mems(struct vioif_softc *sc)
1149 {
1150 	struct virtio_softc *vsc = sc->sc_virtio;
1151 	struct vioif_netqueue *netq;
1152 	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
1153 	struct vioif_net_map *maps;
1154 	unsigned int vq_num;
1155 	int r, rsegs;
1156 	bus_size_t dmamemsize;
1157 	size_t qid, i, netq_num, kmemsize;
1158 	void *vaddr;
1159 	intptr_t p;
1160 
1161 	netq_num = sc->sc_max_nvq_pairs * 2;
1162 
1163 	/* allocate DMA memory */
1164 	dmamemsize = 0;
1165 
1166 	for (qid = 0; qid < netq_num; qid++) {
1167 		maps = sc->sc_netqs[qid].netq_maps;
1168 		vq_num = sc->sc_netqs[qid].netq_vq->vq_num;
1169 		dmamemsize += sizeof(*maps[0].vnm_hdr) * vq_num;
1170 	}
1171 
1172 	if (sc->sc_has_ctrl) {
1173 		dmamemsize += sizeof(struct virtio_net_ctrl_cmd);
1174 		dmamemsize += sizeof(struct virtio_net_ctrl_status);
1175 		dmamemsize += sizeof(struct virtio_net_ctrl_rx);
1176 		dmamemsize += sizeof(struct virtio_net_ctrl_mac_tbl)
1177 		    + ETHER_ADDR_LEN;
1178 		dmamemsize += sizeof(struct virtio_net_ctrl_mac_tbl)
1179 		    + ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_MAXENTRIES;
1180 		dmamemsize += sizeof(struct virtio_net_ctrl_mac_addr);
1181 		dmamemsize += sizeof(struct virtio_net_ctrl_mq);
1182 	}
1183 
1184 	r = bus_dmamem_alloc(virtio_dmat(vsc), dmamemsize, 0, 0,
1185 	    &sc->sc_segs[0], 1, &rsegs, BUS_DMA_NOWAIT);
1186 	if (r != 0) {
1187 		aprint_error_dev(sc->sc_dev,
1188 		    "DMA memory allocation failed, size %" PRIuBUSSIZE ", "
1189 		    "error code %d\n", dmamemsize, r);
1190 		goto err_none;
1191 	}
1192 	r = bus_dmamem_map(virtio_dmat(vsc), &sc->sc_segs[0], 1,
1193 	    dmamemsize, &vaddr, BUS_DMA_NOWAIT);
1194 	if (r != 0) {
1195 		aprint_error_dev(sc->sc_dev,
1196 		    "DMA memory map failed, error code %d\n", r);
1197 		goto err_dmamem_alloc;
1198 	}
1199 
1200 	/* assign DMA memory */
1201 	memset(vaddr, 0, dmamemsize);
1202 	sc->sc_dmamem = vaddr;
1203 	p = (intptr_t) vaddr;
1204 
1205 	for (qid = 0; qid < netq_num; qid++) {
1206 		netq = &sc->sc_netqs[qid];
1207 		maps = netq->netq_maps;
1208 		vq_num = netq->netq_vq->vq_num;
1209 
1210 		netq->netq_maps_kva = vioif_assign_mem(&p,
1211 		    sizeof(*maps[0].vnm_hdr) * vq_num);
1212 	}
1213 
1214 	if (sc->sc_has_ctrl) {
1215 		ctrlq->ctrlq_cmd = vioif_assign_mem(&p,
1216 		    sizeof(*ctrlq->ctrlq_cmd));
1217 		ctrlq->ctrlq_status = vioif_assign_mem(&p,
1218 		    sizeof(*ctrlq->ctrlq_status));
1219 		ctrlq->ctrlq_rx = vioif_assign_mem(&p,
1220 		    sizeof(*ctrlq->ctrlq_rx));
1221 		ctrlq->ctrlq_mac_tbl_uc = vioif_assign_mem(&p,
1222 		    sizeof(*ctrlq->ctrlq_mac_tbl_uc)
1223 		    + ETHER_ADDR_LEN);
1224 		ctrlq->ctrlq_mac_tbl_mc = vioif_assign_mem(&p,
1225 		    sizeof(*ctrlq->ctrlq_mac_tbl_mc)
1226 		    + ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_MAXENTRIES);
1227 		ctrlq->ctrlq_mac_addr = vioif_assign_mem(&p,
1228 		    sizeof(*ctrlq->ctrlq_mac_addr));
1229 		ctrlq->ctrlq_mq = vioif_assign_mem(&p, sizeof(*ctrlq->ctrlq_mq));
1230 	}
1231 
1232 	/* allocate kmem */
1233 	kmemsize = 0;
1234 
1235 	for (qid = 0; qid < netq_num; qid++) {
1236 		netq = &sc->sc_netqs[qid];
1237 		vq_num = netq->netq_vq->vq_num;
1238 
1239 		kmemsize += sizeof(netq->netq_maps[0]) * vq_num;
1240 	}
1241 
1242 	vaddr = kmem_zalloc(kmemsize, KM_SLEEP);
1243 	sc->sc_kmem = vaddr;
1244 
1245 	/* assign allocated kmem */
1246 	p = (intptr_t) vaddr;
1247 
1248 	for (qid = 0; qid < netq_num; qid++) {
1249 		netq = &sc->sc_netqs[qid];
1250 		vq_num = netq->netq_vq->vq_num;
1251 
1252 		netq->netq_maps = vioif_assign_mem(&p,
1253 		    sizeof(netq->netq_maps[0]) * vq_num);
1254 	}
1255 
1256 	/* prepare dmamaps */
1257 	for (qid = 0; qid < netq_num; qid++) {
1258 		static const struct {
1259 			const char	*msg_hdr;
1260 			const char	*msg_payload;
1261 			int		 dma_flag;
1262 			bus_size_t	 dma_size;
1263 			int		 dma_nsegs;
1264 		} dmaparams[VIOIF_NETQ_IDX] = {
1265 			[VIOIF_NETQ_RX] = {
1266 				.msg_hdr	= "rx header",
1267 				.msg_payload	= "rx payload",
1268 				.dma_flag	= BUS_DMA_READ,
1269 				.dma_size	= MCLBYTES - ETHER_ALIGN,
1270 				.dma_nsegs	= 1,
1271 			},
1272 			[VIOIF_NETQ_TX] = {
1273 				.msg_hdr	= "tx header",
1274 				.msg_payload	= "tx payload",
1275 				.dma_flag	= BUS_DMA_WRITE,
1276 				.dma_size	= ETHER_MAX_LEN,
1277 				.dma_nsegs	= VIRTIO_NET_TX_MAXNSEGS,
1278 			}
1279 		};
1280 
1281 		struct virtio_net_hdr *hdrs;
1282 		int dir;
1283 		int nsegs;
1284 
1285 		dir = VIOIF_NETQ_DIR(qid);
1286 		netq = &sc->sc_netqs[qid];
1287 		vq_num = netq->netq_vq->vq_num;
1288 		maps = netq->netq_maps;
1289 		hdrs = netq->netq_maps_kva;
1290 		nsegs = uimin(dmaparams[dir].dma_nsegs, vq_num - 1/*hdr*/);
1291 
1292 		for (i = 0; i < vq_num; i++) {
1293 			maps[i].vnm_hdr = &hdrs[i];
1294 
1295 			r = vioif_dmamap_create_load(sc, &maps[i].vnm_hdr_map,
1296 			    maps[i].vnm_hdr, sc->sc_hdr_size, 1,
1297 			    dmaparams[dir].dma_flag, dmaparams[dir].msg_hdr);
1298 			if (r != 0)
1299 				goto err_reqs;
1300 
1301 			r = vioif_dmamap_create(sc, &maps[i].vnm_mbuf_map,
1302 			    dmaparams[dir].dma_size, nsegs,
1303 			    dmaparams[dir].msg_payload);
1304 			if (r != 0)
1305 				goto err_reqs;
1306 		}
1307 	}
1308 
1309 	if (sc->sc_has_ctrl) {
1310 		/* control vq class & command */
1311 		r = vioif_dmamap_create_load(sc, &ctrlq->ctrlq_cmd_dmamap,
1312 		    ctrlq->ctrlq_cmd, sizeof(*ctrlq->ctrlq_cmd), 1,
1313 		    BUS_DMA_WRITE, "control command");
1314 		if (r != 0)
1315 			goto err_reqs;
1316 
1317 		r = vioif_dmamap_create_load(sc, &ctrlq->ctrlq_status_dmamap,
1318 		    ctrlq->ctrlq_status, sizeof(*ctrlq->ctrlq_status), 1,
1319 		    BUS_DMA_READ, "control status");
1320 		if (r != 0)
1321 			goto err_reqs;
1322 
1323 		/* control vq rx mode command parameter */
1324 		r = vioif_dmamap_create_load(sc, &ctrlq->ctrlq_rx_dmamap,
1325 		    ctrlq->ctrlq_rx, sizeof(*ctrlq->ctrlq_rx), 1,
1326 		    BUS_DMA_WRITE, "rx mode control command");
1327 		if (r != 0)
1328 			goto err_reqs;
1329 
1330 		/* multiqueue set command */
1331 		r = vioif_dmamap_create_load(sc, &ctrlq->ctrlq_mq_dmamap,
1332 		    ctrlq->ctrlq_mq, sizeof(*ctrlq->ctrlq_mq), 1,
1333 		    BUS_DMA_WRITE, "multiqueue set command");
1334 		if (r != 0)
1335 			goto err_reqs;
1336 
1337 		/* control vq MAC filter table for unicast */
1338 		/* do not load now since its length is variable */
1339 		r = vioif_dmamap_create(sc, &ctrlq->ctrlq_tbl_uc_dmamap,
1340 		    sizeof(*ctrlq->ctrlq_mac_tbl_uc)
1341 		    + ETHER_ADDR_LEN, 1,
1342 		    "unicast MAC address filter command");
1343 		if (r != 0)
1344 			goto err_reqs;
1345 
1346 		/* control vq MAC filter table for multicast */
1347 		r = vioif_dmamap_create(sc, &ctrlq->ctrlq_tbl_mc_dmamap,
1348 		    sizeof(*ctrlq->ctrlq_mac_tbl_mc)
1349 		    + ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_MAXENTRIES, 1,
1350 		    "multicast MAC address filter command");
1351 		if (r != 0)
1352 			goto err_reqs;
1353 
1354 		/* control vq MAC address set command */
1355 		r = vioif_dmamap_create_load(sc,
1356 		    &ctrlq->ctrlq_mac_addr_dmamap,
1357 		    ctrlq->ctrlq_mac_addr,
1358 		    sizeof(*ctrlq->ctrlq_mac_addr), 1,
1359 		    BUS_DMA_WRITE, "mac addr set command");
1360 		if (r != 0)
1361 			goto err_reqs;
1362 	}
1363 
1364 	return 0;
1365 
1366 err_reqs:
1367 	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_tbl_mc_dmamap);
1368 	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_tbl_uc_dmamap);
1369 	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_rx_dmamap);
1370 	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_status_dmamap);
1371 	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_cmd_dmamap);
1372 	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_mac_addr_dmamap);
1373 	for (qid = 0; qid < netq_num; qid++) {
1374 		vq_num = sc->sc_netqs[qid].netq_vq->vq_num;
1375 		maps = sc->sc_netqs[qid].netq_maps;
1376 
1377 		for (i = 0; i < vq_num; i++) {
1378 			vioif_dmamap_destroy(sc, &maps[i].vnm_mbuf_map);
1379 			vioif_dmamap_destroy(sc, &maps[i].vnm_hdr_map);
1380 		}
1381 	}
1382 	if (sc->sc_kmem) {
1383 		kmem_free(sc->sc_kmem, kmemsize);
1384 		sc->sc_kmem = NULL;
1385 	}
1386 	bus_dmamem_unmap(virtio_dmat(vsc), sc->sc_dmamem, dmamemsize);
1387 err_dmamem_alloc:
1388 	bus_dmamem_free(virtio_dmat(vsc), &sc->sc_segs[0], 1);
1389 err_none:
1390 	return -1;
1391 }
1392 
1393 static void
1394 vioif_alloc_queues(struct vioif_softc *sc)
1395 {
1396 	int nvq_pairs = sc->sc_max_nvq_pairs;
1397 	size_t nvqs, netq_num;
1398 
1399 	KASSERT(nvq_pairs <= VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX);
1400 
1401 	nvqs = netq_num = sc->sc_max_nvq_pairs * 2;
1402 	if (sc->sc_has_ctrl)
1403 		nvqs++;
1404 
1405 	sc->sc_vqs = kmem_zalloc(sizeof(sc->sc_vqs[0]) * nvqs, KM_SLEEP);
1406 	sc->sc_netqs = kmem_zalloc(sizeof(sc->sc_netqs[0]) * netq_num,
1407 	    KM_SLEEP);
1408 }
1409 
1410 static void
1411 vioif_free_queues(struct vioif_softc *sc)
1412 {
1413 	size_t nvqs, netq_num;
1414 
1415 	nvqs = netq_num = sc->sc_max_nvq_pairs * 2;
1416 	if (sc->sc_ctrlq.ctrlq_vq)
1417 		nvqs++;
1418 
1419 	kmem_free(sc->sc_netqs, sizeof(sc->sc_netqs[0]) * netq_num);
1420 	kmem_free(sc->sc_vqs, sizeof(sc->sc_vqs[0]) * nvqs);
1421 	sc->sc_netqs = NULL;
1422 	sc->sc_vqs = NULL;
1423 }
1424 
1425 /*
1426  * Network queues
1427  */
1428 static int
1429 vioif_netqueue_init(struct vioif_softc *sc, struct virtio_softc *vsc,
1430     size_t qid, u_int softint_flags)
1431 {
1432 	static const struct {
1433 		const char	*dirname;
1434 		int		 segsize;
1435 		int		 nsegs;
1436 		int 		(*intrhand)(void *);
1437 		void		(*sihand)(void *);
1438 	} params[VIOIF_NETQ_IDX] = {
1439 		[VIOIF_NETQ_RX] = {
1440 			.dirname	= "rx",
1441 			.segsize	= MCLBYTES,
1442 			.nsegs		= 2,
1443 			.intrhand	= vioif_rx_intr,
1444 			.sihand		= vioif_rx_handle,
1445 		},
1446 		[VIOIF_NETQ_TX] = {
1447 			.dirname	= "tx",
1448 			.segsize	= ETHER_MAX_LEN - ETHER_HDR_LEN,
1449 			.nsegs		= 2,
1450 			.intrhand	= vioif_tx_intr,
1451 			.sihand		= vioif_tx_handle,
1452 		}
1453 	};
1454 
1455 	struct virtqueue *vq;
1456 	struct vioif_netqueue *netq;
1457 	struct vioif_tx_context *txc;
1458 	struct vioif_rx_context *rxc;
1459 	char qname[32];
1460 	int r, dir;
1461 
1462 	txc = NULL;
1463 	rxc = NULL;
1464 	netq = &sc->sc_netqs[qid];
1465 	vq = &sc->sc_vqs[qid];
1466 	dir = VIOIF_NETQ_DIR(qid);
1467 
1468 	netq->netq_vq = &sc->sc_vqs[qid];
1469 	netq->netq_stopping = false;
1470 	netq->netq_running_handle = false;
1471 
1472 	snprintf(qname, sizeof(qname), "%s%zu",
1473 	    params[dir].dirname, VIOIF_NETQ_PAIRIDX(qid));
1474 	snprintf(netq->netq_evgroup, sizeof(netq->netq_evgroup),
1475 	    "%s-%s", device_xname(sc->sc_dev), qname);
1476 
1477 	mutex_init(&netq->netq_lock, MUTEX_DEFAULT, IPL_NET);
1478 	virtio_init_vq(vsc, vq, qid, params[dir].intrhand, netq);
1479 
1480 	r = virtio_alloc_vq(vsc, vq,
1481 	    params[dir].segsize + sc->sc_hdr_size,
1482 	    params[dir].nsegs, qname);
1483 	if (r != 0)
1484 		goto err;
1485 	netq->netq_vq = vq;
1486 
1487 	netq->netq_softint = softint_establish(softint_flags,
1488 	    params[dir].sihand, netq);
1489 	if (netq->netq_softint == NULL) {
1490 		aprint_error_dev(sc->sc_dev,
1491 		    "couldn't establish %s softint\n",
1492 		    params[dir].dirname);
1493 		goto err;
1494 	}
1495 	vioif_work_set(&netq->netq_work, params[dir].sihand, netq);
1496 
1497 	switch (dir) {
1498 	case VIOIF_NETQ_RX:
1499 		rxc = kmem_zalloc(sizeof(*rxc), KM_SLEEP);
1500 		netq->netq_ctx = rxc;
1501 		/* nothing to do */
1502 		break;
1503 	case VIOIF_NETQ_TX:
1504 		txc = kmem_zalloc(sizeof(*txc), KM_SLEEP);
1505 		netq->netq_ctx = (void *)txc;
1506 		txc->txc_deferred_transmit = softint_establish(softint_flags,
1507 		    vioif_deferred_transmit, netq);
1508 		if (txc->txc_deferred_transmit == NULL) {
1509 			aprint_error_dev(sc->sc_dev,
1510 			    "couldn't establish softint for "
1511 			    "tx deferred transmit\n");
1512 			goto err;
1513 		}
1514 		txc->txc_link_active = VIOIF_IS_LINK_ACTIVE(sc);
1515 		txc->txc_no_free_slots = false;
1516 		txc->txc_intrq = pcq_create(vq->vq_num, KM_SLEEP);
1517 		break;
1518 	}
1519 
1520 	return 0;
1521 
1522 err:
1523 	netq->netq_ctx = NULL;
1524 
1525 	if (rxc != NULL) {
1526 		kmem_free(rxc, sizeof(*rxc));
1527 	}
1528 
1529 	if (txc != NULL) {
1530 		if (txc->txc_deferred_transmit != NULL)
1531 			softint_disestablish(txc->txc_deferred_transmit);
1532 		if (txc->txc_intrq != NULL)
1533 			pcq_destroy(txc->txc_intrq);
1534 		kmem_free(txc, sizeof(txc));
1535 	}
1536 
1537 	vioif_work_set(&netq->netq_work, NULL, NULL);
1538 	if (netq->netq_softint != NULL) {
1539 		softint_disestablish(netq->netq_softint);
1540 		netq->netq_softint = NULL;
1541 	}
1542 
1543 	virtio_free_vq(vsc, vq);
1544 	mutex_destroy(&netq->netq_lock);
1545 	netq->netq_vq = NULL;
1546 
1547 	return -1;
1548 }
1549 
1550 static void
1551 vioif_netqueue_teardown(struct vioif_softc *sc, struct virtio_softc *vsc,
1552     size_t qid)
1553 {
1554 	struct vioif_netqueue *netq;
1555 	struct vioif_rx_context *rxc;
1556 	struct vioif_tx_context *txc;
1557 	int dir;
1558 
1559 	netq = &sc->sc_netqs[qid];
1560 
1561 	if (netq->netq_vq == NULL)
1562 		return;
1563 
1564 	netq = &sc->sc_netqs[qid];
1565 	dir = VIOIF_NETQ_DIR(qid);
1566 	switch (dir) {
1567 	case VIOIF_NETQ_RX:
1568 		rxc = netq->netq_ctx;
1569 		netq->netq_ctx = NULL;
1570 		kmem_free(rxc, sizeof(*rxc));
1571 		break;
1572 	case VIOIF_NETQ_TX:
1573 		txc = netq->netq_ctx;
1574 		netq->netq_ctx = NULL;
1575 		softint_disestablish(txc->txc_deferred_transmit);
1576 		pcq_destroy(txc->txc_intrq);
1577 		kmem_free(txc, sizeof(*txc));
1578 		break;
1579 	}
1580 
1581 	softint_disestablish(netq->netq_softint);
1582 	virtio_free_vq(vsc, netq->netq_vq);
1583 	mutex_destroy(&netq->netq_lock);
1584 	netq->netq_vq = NULL;
1585 }
1586 
1587 static void
1588 vioif_net_sched_handle(struct vioif_softc *sc, struct vioif_netqueue *netq)
1589 {
1590 
1591 	KASSERT(mutex_owned(&netq->netq_lock));
1592 	KASSERT(!netq->netq_stopping);
1593 
1594 	if (netq->netq_workqueue) {
1595 		vioif_work_add(sc->sc_txrx_workqueue, &netq->netq_work);
1596 	} else {
1597 		softint_schedule(netq->netq_softint);
1598 	}
1599 }
1600 
1601 static int
1602 vioif_net_load_mbuf(struct virtio_softc *vsc, struct vioif_net_map *map,
1603    struct mbuf *m, int dma_flags)
1604 {
1605 	int r;
1606 
1607 	KASSERT(map->vnm_mbuf == NULL);
1608 
1609 	r = bus_dmamap_load_mbuf(virtio_dmat(vsc),
1610 	    map->vnm_mbuf_map, m, dma_flags | BUS_DMA_NOWAIT);
1611 	if (r == 0) {
1612 		map->vnm_mbuf = m;
1613 	}
1614 
1615 	return r;
1616 }
1617 
1618 static void
1619 vioif_net_unload_mbuf(struct virtio_softc *vsc, struct vioif_net_map *map)
1620 {
1621 
1622 	KASSERT(map->vnm_mbuf != NULL);
1623 	bus_dmamap_unload(virtio_dmat(vsc), map->vnm_mbuf_map);
1624 	map->vnm_mbuf = NULL;
1625 }
1626 
1627 static int
1628 vioif_net_enqueue(struct virtio_softc *vsc, struct virtqueue *vq,
1629     int slot, struct vioif_net_map *map, int dma_ops, bool is_write)
1630 {
1631 	int r;
1632 
1633 	KASSERT(map->vnm_mbuf != NULL);
1634 
1635 	/* This should actually never fail */
1636 	r = virtio_enqueue_reserve(vsc, vq, slot,
1637 	    map->vnm_mbuf_map->dm_nsegs + 1);
1638 	if (r != 0) {
1639 		/* slot already freed by virtio_enqueue_reserve */
1640 		return r;
1641 	}
1642 
1643 	bus_dmamap_sync(virtio_dmat(vsc), map->vnm_mbuf_map,
1644 	    0, map->vnm_mbuf_map->dm_mapsize, dma_ops);
1645 	bus_dmamap_sync(virtio_dmat(vsc), map->vnm_hdr_map,
1646 	    0, map->vnm_hdr_map->dm_mapsize, dma_ops);
1647 
1648 	virtio_enqueue(vsc, vq, slot, map->vnm_hdr_map, is_write);
1649 	virtio_enqueue(vsc, vq, slot, map->vnm_mbuf_map, is_write);
1650 	virtio_enqueue_commit(vsc, vq, slot, false);
1651 
1652 	return 0;
1653 }
1654 
1655 static int
1656 vioif_net_enqueue_tx(struct virtio_softc *vsc, struct virtqueue *vq,
1657     int slot, struct vioif_net_map *map)
1658 {
1659 
1660 	return vioif_net_enqueue(vsc, vq, slot, map,
1661 	    BUS_DMASYNC_PREWRITE, true);
1662 }
1663 
1664 static int
1665 vioif_net_enqueue_rx(struct virtio_softc *vsc, struct virtqueue *vq,
1666     int slot, struct vioif_net_map *map)
1667 {
1668 
1669 	return vioif_net_enqueue(vsc, vq, slot, map,
1670 	    BUS_DMASYNC_PREREAD, false);
1671 }
1672 
1673 static struct mbuf *
1674 vioif_net_dequeue_commit(struct virtio_softc *vsc, struct virtqueue *vq,
1675    int slot, struct vioif_net_map *map, int dma_flags)
1676 {
1677 	struct mbuf *m;
1678 
1679 	m = map->vnm_mbuf;
1680 	KASSERT(m != NULL);
1681 	map->vnm_mbuf = NULL;
1682 
1683 	bus_dmamap_sync(virtio_dmat(vsc), map->vnm_hdr_map,
1684 	    0, map->vnm_hdr_map->dm_mapsize, dma_flags);
1685 	bus_dmamap_sync(virtio_dmat(vsc), map->vnm_mbuf_map,
1686 	    0, map->vnm_mbuf_map->dm_mapsize, dma_flags);
1687 
1688 	bus_dmamap_unload(virtio_dmat(vsc), map->vnm_mbuf_map);
1689 	virtio_dequeue_commit(vsc, vq, slot);
1690 
1691 	return m;
1692 }
1693 
1694 static void
1695 vioif_net_intr_enable(struct vioif_softc *sc, struct virtio_softc *vsc)
1696 {
1697 	struct vioif_netqueue *netq;
1698 	size_t i, act_qnum;
1699 	int enqueued;
1700 
1701 	act_qnum = sc->sc_act_nvq_pairs * 2;
1702 	for (i = 0; i < act_qnum; i++) {
1703 		netq = &sc->sc_netqs[i];
1704 
1705 		KASSERT(!netq->netq_stopping);
1706 		KASSERT(!netq->netq_running_handle);
1707 
1708 		enqueued = virtio_start_vq_intr(vsc, netq->netq_vq);
1709 		if (enqueued != 0) {
1710 			virtio_stop_vq_intr(vsc, netq->netq_vq);
1711 
1712 			mutex_enter(&netq->netq_lock);
1713 			netq->netq_running_handle = true;
1714 			vioif_net_sched_handle(sc, netq);
1715 			mutex_exit(&netq->netq_lock);
1716 		}
1717 	}
1718 }
1719 
1720 static void
1721 vioif_net_intr_disable(struct vioif_softc *sc, struct virtio_softc *vsc)
1722 {
1723 	struct vioif_netqueue *netq;
1724 	size_t i, act_qnum;
1725 
1726 	act_qnum = sc->sc_act_nvq_pairs * 2;
1727 	for (i = 0; i < act_qnum; i++) {
1728 		netq = &sc->sc_netqs[i];
1729 
1730 		virtio_stop_vq_intr(vsc, netq->netq_vq);
1731 	}
1732 }
1733 
1734 /*
1735  * Receive implementation
1736  */
1737 /* enqueue mbufs to receive slots */
1738 static void
1739 vioif_populate_rx_mbufs_locked(struct vioif_softc *sc, struct vioif_netqueue *netq)
1740 {
1741 	struct virtqueue *vq = netq->netq_vq;
1742 	struct virtio_softc *vsc = vq->vq_owner;
1743 	struct vioif_rx_context *rxc;
1744 	struct vioif_net_map *map;
1745 	struct mbuf *m;
1746 	int i, r, ndone = 0;
1747 
1748 	KASSERT(mutex_owned(&netq->netq_lock));
1749 
1750 	rxc = netq->netq_ctx;
1751 
1752 	for (i = 0; i < vq->vq_num; i++) {
1753 		int slot;
1754 		r = virtio_enqueue_prep(vsc, vq, &slot);
1755 		if (r == EAGAIN)
1756 			break;
1757 		if (__predict_false(r != 0))
1758 			panic("enqueue_prep for rx buffers");
1759 
1760 		MGETHDR(m, M_DONTWAIT, MT_DATA);
1761 		if (m == NULL) {
1762 			virtio_enqueue_abort(vsc, vq, slot);
1763 			rxc->rxc_mbuf_enobufs.ev_count++;
1764 			break;
1765 		}
1766 		MCLGET(m, M_DONTWAIT);
1767 		if ((m->m_flags & M_EXT) == 0) {
1768 			virtio_enqueue_abort(vsc, vq, slot);
1769 			m_freem(m);
1770 			rxc->rxc_mbuf_enobufs.ev_count++;
1771 			break;
1772 		}
1773 
1774 		m->m_len = m->m_pkthdr.len = MCLBYTES;
1775 		m_adj(m, ETHER_ALIGN);
1776 
1777 		map = &netq->netq_maps[slot];
1778 		r = vioif_net_load_mbuf(vsc, map, m, BUS_DMA_READ);
1779 		if (r != 0) {
1780 			virtio_enqueue_abort(vsc, vq, slot);
1781 			m_freem(m);
1782 			netq->netq_mbuf_load_failed.ev_count++;
1783 			break;
1784 		}
1785 
1786 		r = vioif_net_enqueue_rx(vsc, vq, slot, map);
1787 		if (r != 0) {
1788 			vioif_net_unload_mbuf(vsc, map);
1789 			netq->netq_enqueue_failed.ev_count++;
1790 			m_freem(m);
1791 			/* slot already freed by vioif_net_enqueue_rx */
1792 			break;
1793 		}
1794 
1795 		ndone++;
1796 	}
1797 
1798 	if (ndone > 0)
1799 		vioif_notify(vsc, vq);
1800 }
1801 
1802 /* dequeue received packets */
1803 static bool
1804 vioif_rx_deq_locked(struct vioif_softc *sc, struct virtio_softc *vsc,
1805     struct vioif_netqueue *netq, u_int limit, size_t *ndeqp)
1806 {
1807 	struct virtqueue *vq = netq->netq_vq;
1808 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1809 	struct vioif_net_map *map;
1810 	struct mbuf *m;
1811 	int slot, len;
1812 	bool more;
1813 	size_t ndeq;
1814 
1815 	KASSERT(mutex_owned(&netq->netq_lock));
1816 
1817 	more = false;
1818 	ndeq = 0;
1819 
1820 	if (virtio_vq_is_enqueued(vsc, vq) == false)
1821 		goto done;
1822 
1823 	for (;;ndeq++) {
1824 		if (ndeq >= limit) {
1825 			more = true;
1826 			break;
1827 		}
1828 
1829 		if (virtio_dequeue(vsc, vq, &slot, &len) != 0)
1830 			break;
1831 
1832 		map = &netq->netq_maps[slot];
1833 		KASSERT(map->vnm_mbuf != NULL);
1834 		m = vioif_net_dequeue_commit(vsc, vq, slot,
1835 		    map, BUS_DMASYNC_POSTREAD);
1836 		KASSERT(m != NULL);
1837 
1838 		m->m_len = m->m_pkthdr.len = len - sc->sc_hdr_size;
1839 		m_set_rcvif(m, ifp);
1840 		if_percpuq_enqueue(ifp->if_percpuq, m);
1841 	}
1842 
1843 done:
1844 	if (ndeqp != NULL)
1845 		*ndeqp = ndeq;
1846 
1847 	return more;
1848 }
1849 
1850 static void
1851 vioif_rx_queue_clear(struct vioif_softc *sc, struct virtio_softc *vsc,
1852     struct vioif_netqueue *netq)
1853 {
1854 	struct vioif_net_map *map;
1855 	struct mbuf *m;
1856 	unsigned int i, vq_num;
1857 	bool more;
1858 
1859 	mutex_enter(&netq->netq_lock);
1860 
1861 	vq_num = netq->netq_vq->vq_num;
1862 	for (;;) {
1863 		more = vioif_rx_deq_locked(sc, vsc, netq, vq_num, NULL);
1864 		if (more == false)
1865 			break;
1866 	}
1867 
1868 	for (i = 0; i < vq_num; i++) {
1869 		map = &netq->netq_maps[i];
1870 
1871 		m = map->vnm_mbuf;
1872 		if (m == NULL)
1873 			continue;
1874 
1875 		vioif_net_unload_mbuf(vsc, map);
1876 		m_freem(m);
1877 	}
1878 	mutex_exit(&netq->netq_lock);
1879 }
1880 
1881 static void
1882 vioif_rx_handle_locked(void *xnetq, u_int limit)
1883 {
1884 	struct vioif_netqueue *netq = xnetq;
1885 	struct virtqueue *vq = netq->netq_vq;
1886 	struct virtio_softc *vsc = vq->vq_owner;
1887 	struct vioif_softc *sc = device_private(virtio_child(vsc));
1888 	bool more;
1889 	int enqueued;
1890 	size_t ndeq;
1891 
1892 	KASSERT(mutex_owned(&netq->netq_lock));
1893 	KASSERT(!netq->netq_stopping);
1894 
1895 	more = vioif_rx_deq_locked(sc, vsc, netq, limit, &ndeq);
1896 	if (ndeq > 0)
1897 		vioif_populate_rx_mbufs_locked(sc, netq);
1898 
1899 	if (more) {
1900 		vioif_net_sched_handle(sc, netq);
1901 		return;
1902 	}
1903 
1904 	enqueued = virtio_start_vq_intr(vsc, netq->netq_vq);
1905 	if (enqueued != 0) {
1906 		virtio_stop_vq_intr(vsc, netq->netq_vq);
1907 		vioif_net_sched_handle(sc, netq);
1908 		return;
1909 	}
1910 
1911 	netq->netq_running_handle = false;
1912 }
1913 
1914 static int
1915 vioif_rx_intr(void *arg)
1916 {
1917 	struct vioif_netqueue *netq = arg;
1918 	struct virtqueue *vq = netq->netq_vq;
1919 	struct virtio_softc *vsc = vq->vq_owner;
1920 	struct vioif_softc *sc = device_private(virtio_child(vsc));
1921 	u_int limit;
1922 
1923 	mutex_enter(&netq->netq_lock);
1924 
1925 	/* handler is already running in softint/workqueue */
1926 	if (netq->netq_running_handle)
1927 		goto done;
1928 
1929 	if (netq->netq_stopping)
1930 		goto done;
1931 
1932 	netq->netq_running_handle = true;
1933 
1934 	limit = sc->sc_rx_intr_process_limit;
1935 	virtio_stop_vq_intr(vsc, vq);
1936 	vioif_rx_handle_locked(netq, limit);
1937 
1938 done:
1939 	mutex_exit(&netq->netq_lock);
1940 	return 1;
1941 }
1942 
1943 static void
1944 vioif_rx_handle(void *xnetq)
1945 {
1946 	struct vioif_netqueue *netq = xnetq;
1947 	struct virtqueue *vq = netq->netq_vq;
1948 	struct virtio_softc *vsc = vq->vq_owner;
1949 	struct vioif_softc *sc = device_private(virtio_child(vsc));
1950 	u_int limit;
1951 
1952 	mutex_enter(&netq->netq_lock);
1953 
1954 	KASSERT(netq->netq_running_handle);
1955 
1956 	if (netq->netq_stopping) {
1957 		netq->netq_running_handle = false;
1958 		goto done;
1959 	}
1960 
1961 	limit = sc->sc_rx_process_limit;
1962 	vioif_rx_handle_locked(netq, limit);
1963 
1964 done:
1965 	mutex_exit(&netq->netq_lock);
1966 }
1967 
1968 /*
1969  * Transmission implementation
1970  */
1971 /* enqueue mbufs to send */
1972 static void
1973 vioif_send_common_locked(struct ifnet *ifp, struct vioif_netqueue *netq,
1974     bool is_transmit)
1975 {
1976 	struct vioif_softc *sc = ifp->if_softc;
1977 	struct virtio_softc *vsc = sc->sc_virtio;
1978 	struct virtqueue *vq = netq->netq_vq;
1979 	struct vioif_tx_context *txc;
1980 	struct vioif_net_map *map;
1981 	struct mbuf *m;
1982 	int queued = 0;
1983 
1984 	KASSERT(mutex_owned(&netq->netq_lock));
1985 
1986 	if (netq->netq_stopping ||
1987 	    !ISSET(ifp->if_flags, IFF_RUNNING))
1988 		return;
1989 
1990 	txc = netq->netq_ctx;
1991 
1992 	if (!txc->txc_link_active ||
1993 	    txc->txc_no_free_slots)
1994 		return;
1995 
1996 	for (;;) {
1997 		int slot, r;
1998 		r = virtio_enqueue_prep(vsc, vq, &slot);
1999 		if (r == EAGAIN) {
2000 			txc->txc_no_free_slots = true;
2001 			break;
2002 		}
2003 		if (__predict_false(r != 0))
2004 			panic("enqueue_prep for tx buffers");
2005 
2006 		if (is_transmit)
2007 			m = pcq_get(txc->txc_intrq);
2008 		else
2009 			IFQ_DEQUEUE(&ifp->if_snd, m);
2010 
2011 		if (m == NULL) {
2012 			virtio_enqueue_abort(vsc, vq, slot);
2013 			break;
2014 		}
2015 
2016 		map = &netq->netq_maps[slot];
2017 		KASSERT(map->vnm_mbuf == NULL);
2018 
2019 		r = vioif_net_load_mbuf(vsc, map, m, BUS_DMA_WRITE);
2020 		if (r != 0) {
2021 			/* maybe just too fragmented */
2022 			struct mbuf *newm;
2023 
2024 			newm = m_defrag(m, M_NOWAIT);
2025 			if (newm != NULL) {
2026 				m = newm;
2027 				r = vioif_net_load_mbuf(vsc, map, m,
2028 				    BUS_DMA_WRITE);
2029 			} else {
2030 				txc->txc_defrag_failed.ev_count++;
2031 				r = -1;
2032 			}
2033 
2034 			if (r != 0) {
2035 				netq->netq_mbuf_load_failed.ev_count++;
2036 				m_freem(m);
2037 				if_statinc(ifp, if_oerrors);
2038 				virtio_enqueue_abort(vsc, vq, slot);
2039 				continue;
2040 			}
2041 		}
2042 
2043 		memset(map->vnm_hdr, 0, sc->sc_hdr_size);
2044 
2045 		r = vioif_net_enqueue_tx(vsc, vq, slot, map);
2046 		if (r != 0) {
2047 			netq->netq_enqueue_failed.ev_count++;
2048 			vioif_net_unload_mbuf(vsc, map);
2049 			m_freem(m);
2050 			/* slot already freed by vioif_net_enqueue_tx */
2051 
2052 			if_statinc(ifp, if_oerrors);
2053 			continue;
2054 		}
2055 
2056 		queued++;
2057 		bpf_mtap(ifp, m, BPF_D_OUT);
2058 	}
2059 
2060 	if (queued > 0) {
2061 		vioif_notify(vsc, vq);
2062 		ifp->if_timer = 5;
2063 	}
2064 }
2065 
2066 /* dequeue sent mbufs */
2067 static bool
2068 vioif_tx_deq_locked(struct vioif_softc *sc, struct virtio_softc *vsc,
2069     struct vioif_netqueue *netq, u_int limit, size_t *ndeqp)
2070 {
2071 	struct virtqueue *vq = netq->netq_vq;
2072 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
2073 	struct vioif_net_map *map;
2074 	struct mbuf *m;
2075 	int slot, len;
2076 	bool more;
2077 	size_t ndeq;
2078 
2079 	KASSERT(mutex_owned(&netq->netq_lock));
2080 
2081 	more = false;
2082 	ndeq = 0;
2083 
2084 	if (virtio_vq_is_enqueued(vsc, vq) == false)
2085 		goto done;
2086 
2087 	for (;;ndeq++) {
2088 		if (limit-- == 0) {
2089 			more = true;
2090 			break;
2091 		}
2092 
2093 		if (virtio_dequeue(vsc, vq, &slot, &len) != 0)
2094 			break;
2095 
2096 		map = &netq->netq_maps[slot];
2097 		KASSERT(map->vnm_mbuf != NULL);
2098 		m = vioif_net_dequeue_commit(vsc, vq, slot,
2099 		    map, BUS_DMASYNC_POSTWRITE);
2100 		KASSERT(m != NULL);
2101 
2102 		if_statinc(ifp, if_opackets);
2103 		m_freem(m);
2104 	}
2105 
2106 done:
2107 	if (ndeqp != NULL)
2108 		*ndeqp = ndeq;
2109 	return more;
2110 }
2111 
2112 static void
2113 vioif_tx_queue_clear(struct vioif_softc *sc, struct virtio_softc *vsc,
2114     struct vioif_netqueue *netq)
2115 {
2116 	struct vioif_tx_context *txc;
2117 	struct vioif_net_map *map;
2118 	struct mbuf *m;
2119 	unsigned int i, vq_num;
2120 	bool more;
2121 
2122 	mutex_enter(&netq->netq_lock);
2123 
2124 	txc = netq->netq_ctx;
2125 	vq_num = netq->netq_vq->vq_num;
2126 
2127 	for (;;) {
2128 		more = vioif_tx_deq_locked(sc, vsc, netq, vq_num, NULL);
2129 		if (more == false)
2130 			break;
2131 	}
2132 
2133 	for (i = 0; i < vq_num; i++) {
2134 		map = &netq->netq_maps[i];
2135 
2136 		m = map->vnm_mbuf;
2137 		if (m == NULL)
2138 			continue;
2139 
2140 		vioif_net_unload_mbuf(vsc, map);
2141 		m_freem(m);
2142 	}
2143 
2144 	txc->txc_no_free_slots = false;
2145 
2146 	mutex_exit(&netq->netq_lock);
2147 }
2148 
2149 static void
2150 vioif_start_locked(struct ifnet *ifp, struct vioif_netqueue *netq)
2151 {
2152 
2153 	/*
2154 	 * ifp->if_obytes and ifp->if_omcasts are added in if_transmit()@if.c.
2155 	 */
2156 	vioif_send_common_locked(ifp, netq, false);
2157 
2158 }
2159 
2160 static void
2161 vioif_transmit_locked(struct ifnet *ifp, struct vioif_netqueue *netq)
2162 {
2163 
2164 	vioif_send_common_locked(ifp, netq, true);
2165 }
2166 
2167 static void
2168 vioif_deferred_transmit(void *arg)
2169 {
2170 	struct vioif_netqueue *netq = arg;
2171 	struct virtio_softc *vsc = netq->netq_vq->vq_owner;
2172 	struct vioif_softc *sc = device_private(virtio_child(vsc));
2173 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
2174 
2175 	mutex_enter(&netq->netq_lock);
2176 	vioif_send_common_locked(ifp, netq, true);
2177 	mutex_exit(&netq->netq_lock);
2178 }
2179 
2180 static void
2181 vioif_tx_handle_locked(struct vioif_netqueue *netq, u_int limit)
2182 {
2183 	struct virtqueue *vq = netq->netq_vq;
2184 	struct vioif_tx_context *txc = netq->netq_ctx;
2185 	struct virtio_softc *vsc = vq->vq_owner;
2186 	struct vioif_softc *sc = device_private(virtio_child(vsc));
2187 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
2188 	bool more;
2189 	int enqueued;
2190 	size_t ndeq;
2191 
2192 	KASSERT(mutex_owned(&netq->netq_lock));
2193 	KASSERT(!netq->netq_stopping);
2194 
2195 	more = vioif_tx_deq_locked(sc, vsc, netq, limit, &ndeq);
2196 	if (txc->txc_no_free_slots && ndeq > 0) {
2197 		txc->txc_no_free_slots = false;
2198 		softint_schedule(txc->txc_deferred_transmit);
2199 	}
2200 
2201 	if (more) {
2202 		vioif_net_sched_handle(sc, netq);
2203 		return;
2204 	}
2205 
2206 	enqueued = (virtio_features(vsc) & VIRTIO_F_RING_EVENT_IDX) ?
2207 	    virtio_postpone_intr_smart(vsc, vq):
2208 	    virtio_start_vq_intr(vsc, vq);
2209 	if (enqueued != 0) {
2210 		virtio_stop_vq_intr(vsc, vq);
2211 		vioif_net_sched_handle(sc, netq);
2212 		return;
2213 	}
2214 
2215 	netq->netq_running_handle = false;
2216 
2217 	/* for ALTQ */
2218 	if (netq == &sc->sc_netqs[VIOIF_NETQ_TXQID(0)])
2219 		if_schedule_deferred_start(ifp);
2220 
2221 	softint_schedule(txc->txc_deferred_transmit);
2222 }
2223 
2224 static int
2225 vioif_tx_intr(void *arg)
2226 {
2227 	struct vioif_netqueue *netq = arg;
2228 	struct virtqueue *vq = netq->netq_vq;
2229 	struct virtio_softc *vsc = vq->vq_owner;
2230 	struct vioif_softc *sc = device_private(virtio_child(vsc));
2231 	u_int limit;
2232 
2233 	mutex_enter(&netq->netq_lock);
2234 
2235 	/* tx handler is already running in softint/workqueue */
2236 	if (netq->netq_running_handle)
2237 		goto done;
2238 
2239 	if (netq->netq_stopping)
2240 		goto done;
2241 
2242 	netq->netq_running_handle = true;
2243 
2244 	virtio_stop_vq_intr(vsc, vq);
2245 	netq->netq_workqueue = sc->sc_txrx_workqueue_sysctl;
2246 	limit = sc->sc_tx_intr_process_limit;
2247 	vioif_tx_handle_locked(netq, limit);
2248 
2249 done:
2250 	mutex_exit(&netq->netq_lock);
2251 	return 1;
2252 }
2253 
2254 static void
2255 vioif_tx_handle(void *xnetq)
2256 {
2257 	struct vioif_netqueue *netq = xnetq;
2258 	struct virtqueue *vq = netq->netq_vq;
2259 	struct virtio_softc *vsc = vq->vq_owner;
2260 	struct vioif_softc *sc = device_private(virtio_child(vsc));
2261 	u_int limit;
2262 
2263 	mutex_enter(&netq->netq_lock);
2264 
2265 	KASSERT(netq->netq_running_handle);
2266 
2267 	if (netq->netq_stopping) {
2268 		netq->netq_running_handle = false;
2269 		goto done;
2270 	}
2271 
2272 	limit = sc->sc_tx_process_limit;
2273 	vioif_tx_handle_locked(netq, limit);
2274 
2275 done:
2276 	mutex_exit(&netq->netq_lock);
2277 }
2278 
2279 /*
2280  * Control vq
2281  */
2282 /* issue a VIRTIO_NET_CTRL_RX class command and wait for completion */
2283 static void
2284 vioif_ctrl_acquire(struct vioif_softc *sc)
2285 {
2286 	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
2287 
2288 	mutex_enter(&ctrlq->ctrlq_wait_lock);
2289 	while (ctrlq->ctrlq_inuse != FREE)
2290 		cv_wait(&ctrlq->ctrlq_wait, &ctrlq->ctrlq_wait_lock);
2291 	ctrlq->ctrlq_inuse = INUSE;
2292 	ctrlq->ctrlq_owner = curlwp;
2293 	mutex_exit(&ctrlq->ctrlq_wait_lock);
2294 }
2295 
2296 static void
2297 vioif_ctrl_release(struct vioif_softc *sc)
2298 {
2299 	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
2300 
2301 	KASSERT(ctrlq->ctrlq_inuse != FREE);
2302 	KASSERT(ctrlq->ctrlq_owner == curlwp);
2303 
2304 	mutex_enter(&ctrlq->ctrlq_wait_lock);
2305 	ctrlq->ctrlq_inuse = FREE;
2306 	ctrlq->ctrlq_owner = NULL;
2307 	cv_signal(&ctrlq->ctrlq_wait);
2308 	mutex_exit(&ctrlq->ctrlq_wait_lock);
2309 }
2310 
2311 static int
2312 vioif_ctrl_load_cmdspec(struct vioif_softc *sc,
2313     struct vioif_ctrl_cmdspec *specs, int nspecs)
2314 {
2315 	struct virtio_softc *vsc = sc->sc_virtio;
2316 	int i, r, loaded;
2317 
2318 	loaded = 0;
2319 	for (i = 0; i < nspecs; i++) {
2320 		r = bus_dmamap_load(virtio_dmat(vsc),
2321 		    specs[i].dmamap, specs[i].buf, specs[i].bufsize,
2322 		    NULL, BUS_DMA_WRITE | BUS_DMA_NOWAIT);
2323 		if (r) {
2324 			sc->sc_ctrlq.ctrlq_cmd_load_failed.ev_count++;
2325 			goto err;
2326 		}
2327 		loaded++;
2328 
2329 	}
2330 
2331 	return r;
2332 
2333 err:
2334 	for (i = 0; i < loaded; i++) {
2335 		bus_dmamap_unload(virtio_dmat(vsc), specs[i].dmamap);
2336 	}
2337 
2338 	return r;
2339 }
2340 
2341 static void
2342 vioif_ctrl_unload_cmdspec(struct vioif_softc *sc,
2343     struct vioif_ctrl_cmdspec *specs, int nspecs)
2344 {
2345 	struct virtio_softc *vsc = sc->sc_virtio;
2346 	int i;
2347 
2348 	for (i = 0; i < nspecs; i++) {
2349 		bus_dmamap_unload(virtio_dmat(vsc), specs[i].dmamap);
2350 	}
2351 }
2352 
2353 static int
2354 vioif_ctrl_send_command(struct vioif_softc *sc, uint8_t class, uint8_t cmd,
2355     struct vioif_ctrl_cmdspec *specs, int nspecs)
2356 {
2357 	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
2358 	struct virtqueue *vq = ctrlq->ctrlq_vq;
2359 	struct virtio_softc *vsc = sc->sc_virtio;
2360 	int i, r, slot;
2361 
2362 	ctrlq->ctrlq_cmd->class = class;
2363 	ctrlq->ctrlq_cmd->command = cmd;
2364 
2365 	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_cmd_dmamap,
2366 	    0, sizeof(struct virtio_net_ctrl_cmd), BUS_DMASYNC_PREWRITE);
2367 	for (i = 0; i < nspecs; i++) {
2368 		bus_dmamap_sync(virtio_dmat(vsc), specs[i].dmamap,
2369 		    0, specs[i].bufsize, BUS_DMASYNC_PREWRITE);
2370 	}
2371 	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_status_dmamap,
2372 	    0, sizeof(struct virtio_net_ctrl_status), BUS_DMASYNC_PREREAD);
2373 
2374 	/* we need to explicitly (re)start vq intr when using RING EVENT IDX */
2375 	if (virtio_features(vsc) & VIRTIO_F_RING_EVENT_IDX)
2376 		virtio_start_vq_intr(vsc, ctrlq->ctrlq_vq);
2377 
2378 	r = virtio_enqueue_prep(vsc, vq, &slot);
2379 	if (r != 0)
2380 		panic("%s: control vq busy!?", device_xname(sc->sc_dev));
2381 	r = virtio_enqueue_reserve(vsc, vq, slot, nspecs + 2);
2382 	if (r != 0)
2383 		panic("%s: control vq busy!?", device_xname(sc->sc_dev));
2384 	virtio_enqueue(vsc, vq, slot, ctrlq->ctrlq_cmd_dmamap, true);
2385 	for (i = 0; i < nspecs; i++) {
2386 		virtio_enqueue(vsc, vq, slot, specs[i].dmamap, true);
2387 	}
2388 	virtio_enqueue(vsc, vq, slot, ctrlq->ctrlq_status_dmamap, false);
2389 	virtio_enqueue_commit(vsc, vq, slot, true);
2390 
2391 	/* wait for done */
2392 	mutex_enter(&ctrlq->ctrlq_wait_lock);
2393 	while (ctrlq->ctrlq_inuse != DONE)
2394 		cv_wait(&ctrlq->ctrlq_wait, &ctrlq->ctrlq_wait_lock);
2395 	mutex_exit(&ctrlq->ctrlq_wait_lock);
2396 	/* already dequeued */
2397 
2398 	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_cmd_dmamap, 0,
2399 	    sizeof(struct virtio_net_ctrl_cmd), BUS_DMASYNC_POSTWRITE);
2400 	for (i = 0; i < nspecs; i++) {
2401 		bus_dmamap_sync(virtio_dmat(vsc), specs[i].dmamap, 0,
2402 		    specs[i].bufsize, BUS_DMASYNC_POSTWRITE);
2403 	}
2404 	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_status_dmamap, 0,
2405 	    sizeof(struct virtio_net_ctrl_status), BUS_DMASYNC_POSTREAD);
2406 
2407 	if (ctrlq->ctrlq_status->ack == VIRTIO_NET_OK)
2408 		r = 0;
2409 	else {
2410 		device_printf(sc->sc_dev, "failed setting rx mode\n");
2411 		sc->sc_ctrlq.ctrlq_cmd_failed.ev_count++;
2412 		r = EIO;
2413 	}
2414 
2415 	return r;
2416 }
2417 
2418 /* ctrl vq interrupt; wake up the command issuer */
2419 static int
2420 vioif_ctrl_intr(void *arg)
2421 {
2422 	struct vioif_ctrlqueue *ctrlq = arg;
2423 	struct virtqueue *vq = ctrlq->ctrlq_vq;
2424 	struct virtio_softc *vsc = vq->vq_owner;
2425 	int r, slot;
2426 
2427 	if (virtio_vq_is_enqueued(vsc, vq) == false)
2428 		return 0;
2429 
2430 	r = virtio_dequeue(vsc, vq, &slot, NULL);
2431 	if (r == ENOENT)
2432 		return 0;
2433 	virtio_dequeue_commit(vsc, vq, slot);
2434 
2435 	mutex_enter(&ctrlq->ctrlq_wait_lock);
2436 	ctrlq->ctrlq_inuse = DONE;
2437 	cv_signal(&ctrlq->ctrlq_wait);
2438 	mutex_exit(&ctrlq->ctrlq_wait_lock);
2439 
2440 	return 1;
2441 }
2442 
2443 static int
2444 vioif_ctrl_rx(struct vioif_softc *sc, int cmd, bool onoff)
2445 {
2446 	struct virtio_net_ctrl_rx *rx = sc->sc_ctrlq.ctrlq_rx;
2447 	struct vioif_ctrl_cmdspec specs[1];
2448 	int r;
2449 
2450 	if (!sc->sc_has_ctrl)
2451 		return ENOTSUP;
2452 
2453 	vioif_ctrl_acquire(sc);
2454 
2455 	rx->onoff = onoff;
2456 	specs[0].dmamap = sc->sc_ctrlq.ctrlq_rx_dmamap;
2457 	specs[0].buf = rx;
2458 	specs[0].bufsize = sizeof(*rx);
2459 
2460 	r = vioif_ctrl_send_command(sc, VIRTIO_NET_CTRL_RX, cmd,
2461 	    specs, __arraycount(specs));
2462 
2463 	vioif_ctrl_release(sc);
2464 	return r;
2465 }
2466 
2467 static int
2468 vioif_set_promisc(struct vioif_softc *sc, bool onoff)
2469 {
2470 	return vioif_ctrl_rx(sc, VIRTIO_NET_CTRL_RX_PROMISC, onoff);
2471 }
2472 
2473 static int
2474 vioif_set_allmulti(struct vioif_softc *sc, bool onoff)
2475 {
2476 	return vioif_ctrl_rx(sc, VIRTIO_NET_CTRL_RX_ALLMULTI, onoff);
2477 }
2478 
2479 static int
2480 vioif_ctrl_mq_vq_pairs_set(struct vioif_softc *sc, int nvq_pairs)
2481 {
2482 	struct virtio_net_ctrl_mq *mq = sc->sc_ctrlq.ctrlq_mq;
2483 	struct vioif_ctrl_cmdspec specs[1];
2484 	int r;
2485 
2486 	if (!sc->sc_has_ctrl)
2487 		return ENOTSUP;
2488 
2489 	if (nvq_pairs <= 1)
2490 		return EINVAL;
2491 
2492 	vioif_ctrl_acquire(sc);
2493 
2494 	mq->virtqueue_pairs = virtio_rw16(sc->sc_virtio, nvq_pairs);
2495 	specs[0].dmamap = sc->sc_ctrlq.ctrlq_mq_dmamap;
2496 	specs[0].buf = mq;
2497 	specs[0].bufsize = sizeof(*mq);
2498 
2499 	r = vioif_ctrl_send_command(sc,
2500 	    VIRTIO_NET_CTRL_MQ, VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET,
2501 	    specs, __arraycount(specs));
2502 
2503 	vioif_ctrl_release(sc);
2504 
2505 	return r;
2506 }
2507 
2508 static int
2509 vioif_set_mac_addr(struct vioif_softc *sc)
2510 {
2511 	struct virtio_net_ctrl_mac_addr *ma =
2512 	    sc->sc_ctrlq.ctrlq_mac_addr;
2513 	struct vioif_ctrl_cmdspec specs[1];
2514 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
2515 	int nspecs = __arraycount(specs);
2516 	uint64_t features;
2517 	int r;
2518 	size_t i;
2519 
2520 	if (!sc->sc_has_ctrl)
2521 		return ENOTSUP;
2522 
2523 	if (memcmp(CLLADDR(ifp->if_sadl), sc->sc_mac,
2524 	    ETHER_ADDR_LEN) == 0) {
2525 		return 0;
2526 	}
2527 
2528 	memcpy(sc->sc_mac, CLLADDR(ifp->if_sadl), ETHER_ADDR_LEN);
2529 
2530 	features = virtio_features(sc->sc_virtio);
2531 	if (features & VIRTIO_NET_F_CTRL_MAC_ADDR) {
2532 		vioif_ctrl_acquire(sc);
2533 
2534 		memcpy(ma->mac, sc->sc_mac, ETHER_ADDR_LEN);
2535 		specs[0].dmamap = sc->sc_ctrlq.ctrlq_mac_addr_dmamap;
2536 		specs[0].buf = ma;
2537 		specs[0].bufsize = sizeof(*ma);
2538 
2539 		r = vioif_ctrl_send_command(sc,
2540 		    VIRTIO_NET_CTRL_MAC, VIRTIO_NET_CTRL_MAC_ADDR_SET,
2541 		    specs, nspecs);
2542 
2543 		vioif_ctrl_release(sc);
2544 	} else {
2545 		for (i = 0; i < __arraycount(sc->sc_mac); i++) {
2546 			virtio_write_device_config_1(sc->sc_virtio,
2547 			    VIRTIO_NET_CONFIG_MAC + i, sc->sc_mac[i]);
2548 		}
2549 		r = 0;
2550 	}
2551 
2552 	return r;
2553 }
2554 
2555 static int
2556 vioif_set_rx_filter(struct vioif_softc *sc)
2557 {
2558 	/* filter already set in ctrlq->ctrlq_mac_tbl */
2559 	struct virtio_softc *vsc = sc->sc_virtio;
2560 	struct virtio_net_ctrl_mac_tbl *mac_tbl_uc, *mac_tbl_mc;
2561 	struct vioif_ctrl_cmdspec specs[2];
2562 	int nspecs = __arraycount(specs);
2563 	int r;
2564 
2565 	mac_tbl_uc = sc->sc_ctrlq.ctrlq_mac_tbl_uc;
2566 	mac_tbl_mc = sc->sc_ctrlq.ctrlq_mac_tbl_mc;
2567 
2568 	if (!sc->sc_has_ctrl)
2569 		return ENOTSUP;
2570 
2571 	vioif_ctrl_acquire(sc);
2572 
2573 	specs[0].dmamap = sc->sc_ctrlq.ctrlq_tbl_uc_dmamap;
2574 	specs[0].buf = mac_tbl_uc;
2575 	specs[0].bufsize = sizeof(*mac_tbl_uc)
2576 	    + (ETHER_ADDR_LEN * virtio_rw32(vsc, mac_tbl_uc->nentries));
2577 
2578 	specs[1].dmamap = sc->sc_ctrlq.ctrlq_tbl_mc_dmamap;
2579 	specs[1].buf = mac_tbl_mc;
2580 	specs[1].bufsize = sizeof(*mac_tbl_mc)
2581 	    + (ETHER_ADDR_LEN * virtio_rw32(vsc, mac_tbl_mc->nentries));
2582 
2583 	r = vioif_ctrl_load_cmdspec(sc, specs, nspecs);
2584 	if (r != 0)
2585 		goto out;
2586 
2587 	r = vioif_ctrl_send_command(sc,
2588 	    VIRTIO_NET_CTRL_MAC, VIRTIO_NET_CTRL_MAC_TABLE_SET,
2589 	    specs, nspecs);
2590 
2591 	vioif_ctrl_unload_cmdspec(sc, specs, nspecs);
2592 
2593 out:
2594 	vioif_ctrl_release(sc);
2595 
2596 	return r;
2597 }
2598 
2599 /*
2600  * If multicast filter small enough (<=MAXENTRIES) set rx filter
2601  * If large multicast filter exist use ALLMULTI
2602  * If setting rx filter fails fall back to ALLMULTI
2603  */
2604 static int
2605 vioif_rx_filter(struct vioif_softc *sc)
2606 {
2607 	struct virtio_softc *vsc = sc->sc_virtio;
2608 	struct ethercom *ec = &sc->sc_ethercom;
2609 	struct ifnet *ifp = &ec->ec_if;
2610 	struct ether_multi *enm;
2611 	struct ether_multistep step;
2612 	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
2613 	int nentries;
2614 	bool allmulti = 0;
2615 	int r;
2616 
2617 	if (!sc->sc_has_ctrl) {
2618 		goto set_ifflags;
2619 	}
2620 
2621 	memcpy(ctrlq->ctrlq_mac_tbl_uc->macs[0],
2622 	    CLLADDR(ifp->if_sadl), ETHER_ADDR_LEN);
2623 
2624 	nentries = 0;
2625 	allmulti = false;
2626 
2627 	ETHER_LOCK(ec);
2628 	for (ETHER_FIRST_MULTI(step, ec, enm); enm != NULL;
2629 	    ETHER_NEXT_MULTI(step, enm)) {
2630 		if (nentries >= VIRTIO_NET_CTRL_MAC_MAXENTRIES) {
2631 			allmulti = true;
2632 			break;
2633 		}
2634 		if (memcmp(enm->enm_addrlo, enm->enm_addrhi, ETHER_ADDR_LEN)) {
2635 			allmulti = true;
2636 			break;
2637 		}
2638 
2639 		memcpy(ctrlq->ctrlq_mac_tbl_mc->macs[nentries],
2640 		    enm->enm_addrlo, ETHER_ADDR_LEN);
2641 		nentries++;
2642 	}
2643 	ETHER_UNLOCK(ec);
2644 
2645 	r = vioif_set_mac_addr(sc);
2646 	if (r != 0) {
2647 		log(LOG_WARNING, "%s: couldn't set MAC address\n",
2648 		    ifp->if_xname);
2649 	}
2650 
2651 	if (!allmulti) {
2652 		ctrlq->ctrlq_mac_tbl_uc->nentries = virtio_rw32(vsc, 1);
2653 		ctrlq->ctrlq_mac_tbl_mc->nentries = virtio_rw32(vsc, nentries);
2654 		r = vioif_set_rx_filter(sc);
2655 		if (r != 0) {
2656 			allmulti = true; /* fallback */
2657 		}
2658 	}
2659 
2660 	if (allmulti) {
2661 		ctrlq->ctrlq_mac_tbl_uc->nentries = virtio_rw32(vsc, 0);
2662 		ctrlq->ctrlq_mac_tbl_mc->nentries = virtio_rw32(vsc, 0);
2663 		r = vioif_set_rx_filter(sc);
2664 		if (r != 0) {
2665 			log(LOG_DEBUG, "%s: couldn't clear RX filter\n",
2666 			    ifp->if_xname);
2667 			/* what to do on failure? */
2668 		}
2669 
2670 		ifp->if_flags |= IFF_ALLMULTI;
2671 	}
2672 
2673 set_ifflags:
2674 	r = vioif_ifflags(sc);
2675 
2676 	return r;
2677 }
2678 
2679 /*
2680  * VM configuration changes
2681  */
2682 static int
2683 vioif_config_change(struct virtio_softc *vsc)
2684 {
2685 	struct vioif_softc *sc = device_private(virtio_child(vsc));
2686 
2687 	softint_schedule(sc->sc_cfg_softint);
2688 	return 0;
2689 }
2690 
2691 static void
2692 vioif_cfg_softint(void *arg)
2693 {
2694 	struct vioif_softc *sc = arg;
2695 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
2696 
2697 	vioif_update_link_status(sc);
2698 	vioif_start(ifp);
2699 }
2700 
2701 static int
2702 vioif_get_link_status(struct vioif_softc *sc)
2703 {
2704 	struct virtio_softc *vsc = sc->sc_virtio;
2705 	uint16_t status;
2706 
2707 	if (virtio_features(vsc) & VIRTIO_NET_F_STATUS)
2708 		status = virtio_read_device_config_2(vsc,
2709 		    VIRTIO_NET_CONFIG_STATUS);
2710 	else
2711 		status = VIRTIO_NET_S_LINK_UP;
2712 
2713 	if ((status & VIRTIO_NET_S_LINK_UP) != 0)
2714 		return LINK_STATE_UP;
2715 
2716 	return LINK_STATE_DOWN;
2717 }
2718 
2719 static void
2720 vioif_update_link_status(struct vioif_softc *sc)
2721 {
2722 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
2723 	struct vioif_netqueue *netq;
2724 	struct vioif_tx_context *txc;
2725 	bool active;
2726 	int link, i;
2727 
2728 	mutex_enter(&sc->sc_lock);
2729 
2730 	link = vioif_get_link_status(sc);
2731 
2732 	if (link == sc->sc_link_state)
2733 		goto done;
2734 
2735 	sc->sc_link_state = link;
2736 
2737 	active = VIOIF_IS_LINK_ACTIVE(sc);
2738 	for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
2739 		netq = &sc->sc_netqs[VIOIF_NETQ_TXQID(i)];
2740 
2741 		mutex_enter(&netq->netq_lock);
2742 		txc = netq->netq_ctx;
2743 		txc->txc_link_active = active;
2744 		mutex_exit(&netq->netq_lock);
2745 	}
2746 
2747 	if_link_state_change(ifp, sc->sc_link_state);
2748 
2749 done:
2750 	mutex_exit(&sc->sc_lock);
2751 }
2752 
2753 static void
2754 vioif_workq_work(struct work *wk, void *context)
2755 {
2756 	struct vioif_work *work;
2757 
2758 	work = container_of(wk, struct vioif_work, cookie);
2759 
2760 	atomic_store_relaxed(&work->added, 0);
2761 	work->func(work->arg);
2762 }
2763 
2764 static struct workqueue *
2765 vioif_workq_create(const char *name, pri_t prio, int ipl, int flags)
2766 {
2767 	struct workqueue *wq;
2768 	int error;
2769 
2770 	error = workqueue_create(&wq, name, vioif_workq_work, NULL,
2771 	    prio, ipl, flags);
2772 
2773 	if (error)
2774 		return NULL;
2775 
2776 	return wq;
2777 }
2778 
2779 static void
2780 vioif_workq_destroy(struct workqueue *wq)
2781 {
2782 
2783 	workqueue_destroy(wq);
2784 }
2785 
2786 static void
2787 vioif_work_set(struct vioif_work *work, void (*func)(void *), void *arg)
2788 {
2789 
2790 	memset(work, 0, sizeof(*work));
2791 	work->func = func;
2792 	work->arg = arg;
2793 }
2794 
2795 static void
2796 vioif_work_add(struct workqueue *wq, struct vioif_work *work)
2797 {
2798 
2799 	if (atomic_load_relaxed(&work->added) != 0)
2800 		return;
2801 
2802 	atomic_store_relaxed(&work->added, 1);
2803 	kpreempt_disable();
2804 	workqueue_enqueue(wq, &work->cookie, NULL);
2805 	kpreempt_enable();
2806 }
2807 
2808 static void
2809 vioif_work_wait(struct workqueue *wq, struct vioif_work *work)
2810 {
2811 
2812 	workqueue_wait(wq, &work->cookie);
2813 }
2814 
2815 MODULE(MODULE_CLASS_DRIVER, if_vioif, "virtio");
2816 
2817 #ifdef _MODULE
2818 #include "ioconf.c"
2819 #endif
2820 
2821 static int
2822 if_vioif_modcmd(modcmd_t cmd, void *opaque)
2823 {
2824 	int error = 0;
2825 
2826 #ifdef _MODULE
2827 	switch (cmd) {
2828 	case MODULE_CMD_INIT:
2829 		error = config_init_component(cfdriver_ioconf_if_vioif,
2830 		    cfattach_ioconf_if_vioif, cfdata_ioconf_if_vioif);
2831 		break;
2832 	case MODULE_CMD_FINI:
2833 		error = config_fini_component(cfdriver_ioconf_if_vioif,
2834 		    cfattach_ioconf_if_vioif, cfdata_ioconf_if_vioif);
2835 		break;
2836 	default:
2837 		error = ENOTTY;
2838 		break;
2839 	}
2840 #endif
2841 
2842 	return error;
2843 }
2844