xref: /netbsd-src/sys/dev/pci/if_vioif.c (revision f3cfa6f6ce31685c6c4a758bc430e69eb99f50a4)
1 /*	$NetBSD: if_vioif.c,v 1.49 2019/05/23 13:10:52 msaitoh Exp $	*/
2 
3 /*
4  * Copyright (c) 2010 Minoura Makoto.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 __KERNEL_RCSID(0, "$NetBSD: if_vioif.c,v 1.49 2019/05/23 13:10:52 msaitoh Exp $");
30 
31 #ifdef _KERNEL_OPT
32 #include "opt_net_mpsafe.h"
33 #endif
34 
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/kernel.h>
38 #include <sys/bus.h>
39 #include <sys/condvar.h>
40 #include <sys/device.h>
41 #include <sys/intr.h>
42 #include <sys/kmem.h>
43 #include <sys/mbuf.h>
44 #include <sys/mutex.h>
45 #include <sys/sockio.h>
46 #include <sys/cpu.h>
47 #include <sys/module.h>
48 #include <sys/pcq.h>
49 
50 #include <dev/pci/virtioreg.h>
51 #include <dev/pci/virtiovar.h>
52 
53 #include <net/if.h>
54 #include <net/if_media.h>
55 #include <net/if_ether.h>
56 
57 #include <net/bpf.h>
58 
59 #include "ioconf.h"
60 
61 #ifdef NET_MPSAFE
62 #define VIOIF_MPSAFE	1
63 #define VIOIF_MULTIQ	1
64 #endif
65 
66 #ifdef SOFTINT_INTR
67 #define VIOIF_SOFTINT_INTR	1
68 #endif
69 
70 /*
71  * if_vioifreg.h:
72  */
73 /* Configuration registers */
74 #define VIRTIO_NET_CONFIG_MAC		0 /* 8bit x 6byte */
75 #define VIRTIO_NET_CONFIG_STATUS	6 /* 16bit */
76 #define VIRTIO_NET_CONFIG_MAX_VQ_PAIRS	8 /* 16bit */
77 
78 /* Feature bits */
79 #define VIRTIO_NET_F_CSUM		__BIT(0)
80 #define VIRTIO_NET_F_GUEST_CSUM		__BIT(1)
81 #define VIRTIO_NET_F_MAC		__BIT(5)
82 #define VIRTIO_NET_F_GSO		__BIT(6)
83 #define VIRTIO_NET_F_GUEST_TSO4		__BIT(7)
84 #define VIRTIO_NET_F_GUEST_TSO6		__BIT(8)
85 #define VIRTIO_NET_F_GUEST_ECN		__BIT(9)
86 #define VIRTIO_NET_F_GUEST_UFO		__BIT(10)
87 #define VIRTIO_NET_F_HOST_TSO4		__BIT(11)
88 #define VIRTIO_NET_F_HOST_TSO6		__BIT(12)
89 #define VIRTIO_NET_F_HOST_ECN		__BIT(13)
90 #define VIRTIO_NET_F_HOST_UFO		__BIT(14)
91 #define VIRTIO_NET_F_MRG_RXBUF		__BIT(15)
92 #define VIRTIO_NET_F_STATUS		__BIT(16)
93 #define VIRTIO_NET_F_CTRL_VQ		__BIT(17)
94 #define VIRTIO_NET_F_CTRL_RX		__BIT(18)
95 #define VIRTIO_NET_F_CTRL_VLAN		__BIT(19)
96 #define VIRTIO_NET_F_CTRL_RX_EXTRA	__BIT(20)
97 #define VIRTIO_NET_F_GUEST_ANNOUNCE	__BIT(21)
98 #define VIRTIO_NET_F_MQ			__BIT(22)
99 
100 #define VIRTIO_NET_FLAG_BITS \
101 	VIRTIO_COMMON_FLAG_BITS \
102 	"\x17""MQ" \
103 	"\x16""GUEST_ANNOUNCE" \
104 	"\x15""CTRL_RX_EXTRA" \
105 	"\x14""CTRL_VLAN" \
106 	"\x13""CTRL_RX" \
107 	"\x12""CTRL_VQ" \
108 	"\x11""STATUS" \
109 	"\x10""MRG_RXBUF" \
110 	"\x0f""HOST_UFO" \
111 	"\x0e""HOST_ECN" \
112 	"\x0d""HOST_TSO6" \
113 	"\x0c""HOST_TSO4" \
114 	"\x0b""GUEST_UFO" \
115 	"\x0a""GUEST_ECN" \
116 	"\x09""GUEST_TSO6" \
117 	"\x08""GUEST_TSO4" \
118 	"\x07""GSO" \
119 	"\x06""MAC" \
120 	"\x02""GUEST_CSUM" \
121 	"\x01""CSUM"
122 
123 /* Status */
124 #define VIRTIO_NET_S_LINK_UP	1
125 
126 /* Packet header structure */
127 struct virtio_net_hdr {
128 	uint8_t		flags;
129 	uint8_t		gso_type;
130 	uint16_t	hdr_len;
131 	uint16_t	gso_size;
132 	uint16_t	csum_start;
133 	uint16_t	csum_offset;
134 #if 0
135 	uint16_t	num_buffers; /* if VIRTIO_NET_F_MRG_RXBUF enabled */
136 #endif
137 } __packed;
138 
139 #define VIRTIO_NET_HDR_F_NEEDS_CSUM	1 /* flags */
140 #define VIRTIO_NET_HDR_GSO_NONE		0 /* gso_type */
141 #define VIRTIO_NET_HDR_GSO_TCPV4	1 /* gso_type */
142 #define VIRTIO_NET_HDR_GSO_UDP		3 /* gso_type */
143 #define VIRTIO_NET_HDR_GSO_TCPV6	4 /* gso_type */
144 #define VIRTIO_NET_HDR_GSO_ECN		0x80 /* gso_type, |'ed */
145 
146 #define VIRTIO_NET_MAX_GSO_LEN		(65536+ETHER_HDR_LEN)
147 
148 /* Control virtqueue */
149 struct virtio_net_ctrl_cmd {
150 	uint8_t	class;
151 	uint8_t	command;
152 } __packed;
153 #define VIRTIO_NET_CTRL_RX		0
154 # define VIRTIO_NET_CTRL_RX_PROMISC	0
155 # define VIRTIO_NET_CTRL_RX_ALLMULTI	1
156 
157 #define VIRTIO_NET_CTRL_MAC		1
158 # define VIRTIO_NET_CTRL_MAC_TABLE_SET	0
159 
160 #define VIRTIO_NET_CTRL_VLAN		2
161 # define VIRTIO_NET_CTRL_VLAN_ADD	0
162 # define VIRTIO_NET_CTRL_VLAN_DEL	1
163 
164 #define VIRTIO_NET_CTRL_MQ			4
165 # define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET	0
166 # define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN	1
167 # define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX	0x8000
168 
169 struct virtio_net_ctrl_status {
170 	uint8_t	ack;
171 } __packed;
172 #define VIRTIO_NET_OK			0
173 #define VIRTIO_NET_ERR			1
174 
175 struct virtio_net_ctrl_rx {
176 	uint8_t	onoff;
177 } __packed;
178 
179 struct virtio_net_ctrl_mac_tbl {
180 	uint32_t nentries;
181 	uint8_t macs[][ETHER_ADDR_LEN];
182 } __packed;
183 
184 struct virtio_net_ctrl_vlan {
185 	uint16_t id;
186 } __packed;
187 
188 struct virtio_net_ctrl_mq {
189 	uint16_t virtqueue_pairs;
190 } __packed;
191 
192 struct vioif_ctrl_cmdspec {
193 	bus_dmamap_t	dmamap;
194 	void		*buf;
195 	bus_size_t	bufsize;
196 };
197 
198 /*
199  * if_vioifvar.h:
200  */
201 
202 /*
203  * Locking notes:
204  * + a field in vioif_txqueue is protected by txq_lock (a spin mutex), and
205  *   a filds in vioif_rxqueue is protected by rxq_lock (a spin mutex).
206  *      - more than one lock cannot be held at onece
207  * + ctrlq_inuse is protected by ctrlq_wait_lock.
208  *      - other fields in vioif_ctrlqueue are protected by ctrlq_inuse
209  *      - txq_lock or rxq_lock cannot be held along with ctrlq_wait_lock
210  */
211 
212 struct vioif_txqueue {
213 	kmutex_t		*txq_lock;	/* lock for tx operations */
214 
215 	struct virtqueue	*txq_vq;
216 	bool			txq_stopping;
217 	bool			txq_link_active;
218 	pcq_t			*txq_intrq;
219 
220 	struct virtio_net_hdr	*txq_hdrs;
221 	bus_dmamap_t		*txq_hdr_dmamaps;
222 
223 	struct mbuf		**txq_mbufs;
224 	bus_dmamap_t		*txq_dmamaps;
225 
226 	void			*txq_deferred_transmit;
227 };
228 
229 struct vioif_rxqueue {
230 	kmutex_t		*rxq_lock;	/* lock for rx operations */
231 
232 	struct virtqueue	*rxq_vq;
233 	bool			rxq_stopping;
234 
235 	struct virtio_net_hdr	*rxq_hdrs;
236 	bus_dmamap_t		*rxq_hdr_dmamaps;
237 
238 	struct mbuf		**rxq_mbufs;
239 	bus_dmamap_t		*rxq_dmamaps;
240 
241 	void			*rxq_softint;
242 };
243 
244 struct vioif_ctrlqueue {
245 	struct virtqueue		*ctrlq_vq;
246 	enum {
247 		FREE, INUSE, DONE
248 	}				ctrlq_inuse;
249 	kcondvar_t			ctrlq_wait;
250 	kmutex_t			ctrlq_wait_lock;
251 	struct lwp			*ctrlq_owner;
252 
253 	struct virtio_net_ctrl_cmd	*ctrlq_cmd;
254 	struct virtio_net_ctrl_status	*ctrlq_status;
255 	struct virtio_net_ctrl_rx	*ctrlq_rx;
256 	struct virtio_net_ctrl_mac_tbl	*ctrlq_mac_tbl_uc;
257 	struct virtio_net_ctrl_mac_tbl	*ctrlq_mac_tbl_mc;
258 	struct virtio_net_ctrl_mq	*ctrlq_mq;
259 
260 	bus_dmamap_t			ctrlq_cmd_dmamap;
261 	bus_dmamap_t			ctrlq_status_dmamap;
262 	bus_dmamap_t			ctrlq_rx_dmamap;
263 	bus_dmamap_t			ctrlq_tbl_uc_dmamap;
264 	bus_dmamap_t			ctrlq_tbl_mc_dmamap;
265 	bus_dmamap_t			ctrlq_mq_dmamap;
266 };
267 
268 struct vioif_softc {
269 	device_t		sc_dev;
270 
271 	struct virtio_softc	*sc_virtio;
272 	struct virtqueue	*sc_vqs;
273 
274 	int			sc_max_nvq_pairs;
275 	int			sc_req_nvq_pairs;
276 	int			sc_act_nvq_pairs;
277 
278 	uint8_t			sc_mac[ETHER_ADDR_LEN];
279 	struct ethercom		sc_ethercom;
280 	short			sc_deferred_init_done;
281 	bool			sc_link_active;
282 
283 	struct vioif_txqueue	*sc_txq;
284 	struct vioif_rxqueue	*sc_rxq;
285 
286 	bool			sc_has_ctrl;
287 	struct vioif_ctrlqueue	sc_ctrlq;
288 
289 	bus_dma_segment_t	sc_hdr_segs[1];
290 	void			*sc_dmamem;
291 	void			*sc_kmem;
292 
293 	void			*sc_ctl_softint;
294 };
295 #define VIRTIO_NET_TX_MAXNSEGS		(16) /* XXX */
296 #define VIRTIO_NET_CTRL_MAC_MAXENTRIES	(64) /* XXX */
297 
298 /* cfattach interface functions */
299 static int	vioif_match(device_t, cfdata_t, void *);
300 static void	vioif_attach(device_t, device_t, void *);
301 static void	vioif_deferred_init(device_t);
302 
303 /* ifnet interface functions */
304 static int	vioif_init(struct ifnet *);
305 static void	vioif_stop(struct ifnet *, int);
306 static void	vioif_start(struct ifnet *);
307 static void	vioif_start_locked(struct ifnet *, struct vioif_txqueue *);
308 static int	vioif_transmit(struct ifnet *, struct mbuf *);
309 static void	vioif_transmit_locked(struct ifnet *, struct vioif_txqueue *);
310 static int	vioif_ioctl(struct ifnet *, u_long, void *);
311 static void	vioif_watchdog(struct ifnet *);
312 
313 /* rx */
314 static int	vioif_add_rx_mbuf(struct vioif_rxqueue *, int);
315 static void	vioif_free_rx_mbuf(struct vioif_rxqueue *, int);
316 static void	vioif_populate_rx_mbufs(struct vioif_rxqueue *);
317 static void	vioif_populate_rx_mbufs_locked(struct vioif_rxqueue *);
318 static int	vioif_rx_deq(struct vioif_rxqueue *);
319 static int	vioif_rx_deq_locked(struct vioif_rxqueue *);
320 static int	vioif_rx_vq_done(struct virtqueue *);
321 static void	vioif_rx_softint(void *);
322 static void	vioif_rx_drain(struct vioif_rxqueue *);
323 
324 /* tx */
325 static int	vioif_tx_vq_done(struct virtqueue *);
326 static int	vioif_tx_vq_done_locked(struct virtqueue *);
327 static void	vioif_tx_drain(struct vioif_txqueue *);
328 static void	vioif_deferred_transmit(void *);
329 
330 /* other control */
331 static bool	vioif_is_link_up(struct vioif_softc *);
332 static void	vioif_update_link_status(struct vioif_softc *);
333 static int	vioif_ctrl_rx(struct vioif_softc *, int, bool);
334 static int	vioif_set_promisc(struct vioif_softc *, bool);
335 static int	vioif_set_allmulti(struct vioif_softc *, bool);
336 static int	vioif_set_rx_filter(struct vioif_softc *);
337 static int	vioif_rx_filter(struct vioif_softc *);
338 static int	vioif_ctrl_vq_done(struct virtqueue *);
339 static int	vioif_config_change(struct virtio_softc *);
340 static void	vioif_ctl_softint(void *);
341 static int	vioif_ctrl_mq_vq_pairs_set(struct vioif_softc *, int);
342 static void	vioif_enable_interrupt_vqpairs(struct vioif_softc *);
343 static void	vioif_disable_interrupt_vqpairs(struct vioif_softc *);
344 
345 CFATTACH_DECL_NEW(vioif, sizeof(struct vioif_softc),
346 		  vioif_match, vioif_attach, NULL, NULL);
347 
348 static int
349 vioif_match(device_t parent, cfdata_t match, void *aux)
350 {
351 	struct virtio_attach_args *va = aux;
352 
353 	if (va->sc_childdevid == PCI_PRODUCT_VIRTIO_NETWORK)
354 		return 1;
355 
356 	return 0;
357 }
358 
359 static int
360 vioif_alloc_queues(struct vioif_softc *sc)
361 {
362 	int nvq_pairs = sc->sc_max_nvq_pairs;
363 	int nvqs = nvq_pairs * 2;
364 	int i;
365 
366 	KASSERT(nvq_pairs <= VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX);
367 
368 	sc->sc_rxq = kmem_zalloc(sizeof(sc->sc_rxq[0]) * nvq_pairs,
369 	    KM_NOSLEEP);
370 	if (sc->sc_rxq == NULL)
371 		return -1;
372 
373 	sc->sc_txq = kmem_zalloc(sizeof(sc->sc_txq[0]) * nvq_pairs,
374 	    KM_NOSLEEP);
375 	if (sc->sc_txq == NULL)
376 		return -1;
377 
378 	if (sc->sc_has_ctrl)
379 		nvqs++;
380 
381 	sc->sc_vqs = kmem_zalloc(sizeof(sc->sc_vqs[0]) * nvqs, KM_NOSLEEP);
382 	if (sc->sc_vqs == NULL)
383 		return -1;
384 
385 	nvqs = 0;
386 	for (i = 0; i < nvq_pairs; i++) {
387 		sc->sc_rxq[i].rxq_vq = &sc->sc_vqs[nvqs++];
388 		sc->sc_txq[i].txq_vq = &sc->sc_vqs[nvqs++];
389 	}
390 
391 	if (sc->sc_has_ctrl)
392 		sc->sc_ctrlq.ctrlq_vq = &sc->sc_vqs[nvqs++];
393 
394 	return 0;
395 }
396 
397 static void
398 vioif_free_queues(struct vioif_softc *sc)
399 {
400 	int nvq_pairs = sc->sc_max_nvq_pairs;
401 	int nvqs = nvq_pairs * 2;
402 
403 	if (sc->sc_ctrlq.ctrlq_vq)
404 		nvqs++;
405 
406 	if (sc->sc_txq) {
407 		kmem_free(sc->sc_txq, sizeof(sc->sc_txq[0]) * nvq_pairs);
408 		sc->sc_txq = NULL;
409 	}
410 
411 	if (sc->sc_rxq) {
412 		kmem_free(sc->sc_rxq, sizeof(sc->sc_rxq[0]) * nvq_pairs);
413 		sc->sc_rxq = NULL;
414 	}
415 
416 	if (sc->sc_vqs) {
417 		kmem_free(sc->sc_vqs, sizeof(sc->sc_vqs[0]) * nvqs);
418 		sc->sc_vqs = NULL;
419 	}
420 }
421 
422 /* allocate memory */
423 /*
424  * dma memory is used for:
425  *   rxq_hdrs[slot]:	 metadata array for received frames (READ)
426  *   txq_hdrs[slot]:	 metadata array for frames to be sent (WRITE)
427  *   ctrlq_cmd:		 command to be sent via ctrl vq (WRITE)
428  *   ctrlq_status:	 return value for a command via ctrl vq (READ)
429  *   ctrlq_rx:		 parameter for a VIRTIO_NET_CTRL_RX class command
430  *			 (WRITE)
431  *   ctrlq_mac_tbl_uc:	 unicast MAC address filter for a VIRTIO_NET_CTRL_MAC
432  *			 class command (WRITE)
433  *   ctrlq_mac_tbl_mc:	 multicast MAC address filter for a VIRTIO_NET_CTRL_MAC
434  *			 class command (WRITE)
435  * ctrlq_* structures are allocated only one each; they are protected by
436  * ctrlq_inuse variable and ctrlq_wait condvar.
437  */
438 /*
439  * dynamically allocated memory is used for:
440  *   rxq_hdr_dmamaps[slot]:	bus_dmamap_t array for sc_rx_hdrs[slot]
441  *   txq_hdr_dmamaps[slot]:	bus_dmamap_t array for sc_tx_hdrs[slot]
442  *   rxq_dmamaps[slot]:		bus_dmamap_t array for received payload
443  *   txq_dmamaps[slot]:		bus_dmamap_t array for sent payload
444  *   rxq_mbufs[slot]:		mbuf pointer array for received frames
445  *   txq_mbufs[slot]:		mbuf pointer array for sent frames
446  */
447 static int
448 vioif_alloc_mems(struct vioif_softc *sc)
449 {
450 	struct virtio_softc *vsc = sc->sc_virtio;
451 	struct vioif_txqueue *txq;
452 	struct vioif_rxqueue *rxq;
453 	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
454 	int allocsize, allocsize2, r, rsegs, i, qid;
455 	void *vaddr;
456 	intptr_t p;
457 
458 	allocsize = 0;
459 	for (qid = 0; qid < sc->sc_max_nvq_pairs; qid++) {
460 		rxq = &sc->sc_rxq[qid];
461 		txq = &sc->sc_txq[qid];
462 
463 		allocsize +=
464 		    sizeof(struct virtio_net_hdr) * rxq->rxq_vq->vq_num;
465 		allocsize +=
466 		    sizeof(struct virtio_net_hdr) * txq->txq_vq->vq_num;
467 	}
468 	if (sc->sc_has_ctrl) {
469 		allocsize += sizeof(struct virtio_net_ctrl_cmd) * 1;
470 		allocsize += sizeof(struct virtio_net_ctrl_status) * 1;
471 		allocsize += sizeof(struct virtio_net_ctrl_rx) * 1;
472 		allocsize += sizeof(struct virtio_net_ctrl_mac_tbl)
473 			+ sizeof(struct virtio_net_ctrl_mac_tbl)
474 			+ ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_MAXENTRIES;
475 		allocsize += sizeof(struct virtio_net_ctrl_mq) * 1;
476 	}
477 	r = bus_dmamem_alloc(virtio_dmat(vsc), allocsize, 0, 0,
478 			     &sc->sc_hdr_segs[0], 1, &rsegs, BUS_DMA_NOWAIT);
479 	if (r != 0) {
480 		aprint_error_dev(sc->sc_dev,
481 				 "DMA memory allocation failed, size %d, "
482 				 "error code %d\n", allocsize, r);
483 		goto err_none;
484 	}
485 	r = bus_dmamem_map(virtio_dmat(vsc),
486 			   &sc->sc_hdr_segs[0], 1, allocsize,
487 			   &vaddr, BUS_DMA_NOWAIT);
488 	if (r != 0) {
489 		aprint_error_dev(sc->sc_dev,
490 				 "DMA memory map failed, "
491 				 "error code %d\n", r);
492 		goto err_dmamem_alloc;
493 	}
494 
495 #define P(p, p0, p0size)	do { p0 = (void *) p;		\
496 				     p += p0size; } while (0)
497 	memset(vaddr, 0, allocsize);
498 	sc->sc_dmamem = vaddr;
499 	p = (intptr_t) vaddr;
500 
501 	for (qid = 0; qid < sc->sc_max_nvq_pairs; qid++) {
502 		rxq = &sc->sc_rxq[qid];
503 		txq = &sc->sc_txq[qid];
504 
505 		P(p, rxq->rxq_hdrs,
506 		    sizeof(rxq->rxq_hdrs[0]) * rxq->rxq_vq->vq_num);
507 		P(p, txq->txq_hdrs,
508 		    sizeof(txq->txq_hdrs[0]) * txq->txq_vq->vq_num);
509 	}
510 	if (sc->sc_has_ctrl) {
511 		P(p, ctrlq->ctrlq_cmd, sizeof(*ctrlq->ctrlq_cmd));
512 		P(p, ctrlq->ctrlq_status, sizeof(*ctrlq->ctrlq_status));
513 		P(p, ctrlq->ctrlq_rx, sizeof(*ctrlq->ctrlq_rx));
514 		P(p, ctrlq->ctrlq_mac_tbl_uc,
515 		    sizeof(*ctrlq->ctrlq_mac_tbl_uc) + 0);
516 		P(p, ctrlq->ctrlq_mac_tbl_mc,
517 		    (sizeof(*ctrlq->ctrlq_mac_tbl_mc)
518 		    + ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_MAXENTRIES));
519 		P(p, ctrlq->ctrlq_mq, sizeof(*ctrlq->ctrlq_mq));
520 	}
521 
522 	allocsize2 = 0;
523 	for (qid = 0; qid < sc->sc_max_nvq_pairs; qid++) {
524 		int rxqsize, txqsize;
525 
526 		rxq = &sc->sc_rxq[qid];
527 		txq = &sc->sc_txq[qid];
528 		rxqsize = rxq->rxq_vq->vq_num;
529 		txqsize = txq->txq_vq->vq_num;
530 
531 		allocsize2 += sizeof(rxq->rxq_dmamaps[0]) * rxqsize;
532 		allocsize2 += sizeof(rxq->rxq_hdr_dmamaps[0]) * rxqsize;
533 		allocsize2 += sizeof(rxq->rxq_mbufs[0]) * rxqsize;
534 
535 		allocsize2 += sizeof(txq->txq_dmamaps[0]) * txqsize;
536 		allocsize2 += sizeof(txq->txq_hdr_dmamaps[0]) * txqsize;
537 		allocsize2 += sizeof(txq->txq_mbufs[0]) * txqsize;
538 	}
539 	vaddr = kmem_zalloc(allocsize2, KM_SLEEP);
540 	sc->sc_kmem = vaddr;
541 	p = (intptr_t) vaddr;
542 
543 	for (qid = 0; qid < sc->sc_max_nvq_pairs; qid++) {
544 		int rxqsize, txqsize;
545 		rxq = &sc->sc_rxq[qid];
546 		txq = &sc->sc_txq[qid];
547 		rxqsize = rxq->rxq_vq->vq_num;
548 		txqsize = txq->txq_vq->vq_num;
549 
550 		P(p, rxq->rxq_hdr_dmamaps,
551 		    sizeof(rxq->rxq_hdr_dmamaps[0]) * rxqsize);
552 		P(p, txq->txq_hdr_dmamaps,
553 		    sizeof(txq->txq_hdr_dmamaps[0]) * txqsize);
554 		P(p, rxq->rxq_dmamaps, sizeof(rxq->rxq_dmamaps[0]) * rxqsize);
555 		P(p, txq->txq_dmamaps, sizeof(txq->txq_dmamaps[0]) * txqsize);
556 		P(p, rxq->rxq_mbufs, sizeof(rxq->rxq_mbufs[0]) * rxqsize);
557 		P(p, txq->txq_mbufs, sizeof(txq->txq_mbufs[0]) * txqsize);
558 	}
559 #undef P
560 
561 #define C(map, size, nsegs, usage)					      \
562 	do {								      \
563 		r = bus_dmamap_create(virtio_dmat(vsc), size, nsegs, size, 0, \
564 		    BUS_DMA_NOWAIT | BUS_DMA_ALLOCNOW,			      \
565 		    &map);						      \
566 		if (r != 0) {						      \
567 			aprint_error_dev(sc->sc_dev,			      \
568 			    "%s dmamap creation failed, "		      \
569 			    "error code %d\n", usage, r);		      \
570 			goto err_reqs;					      \
571 		}							      \
572 	} while (0)
573 #define C_L(map, buf, size, nsegs, rw, usage)				\
574 	C(map, size, nsegs, usage);					\
575 	do {								\
576 		r = bus_dmamap_load(virtio_dmat(vsc), map,		\
577 				    buf, size, NULL,			\
578 				    rw | BUS_DMA_NOWAIT);		\
579 		if (r != 0) {						\
580 			aprint_error_dev(sc->sc_dev,			\
581 					 usage " dmamap load failed, "	\
582 					 "error code %d\n", r);		\
583 			goto err_reqs;					\
584 		}							\
585 	} while (0)
586 
587 	for (qid = 0; qid < sc->sc_max_nvq_pairs; qid++) {
588 		rxq = &sc->sc_rxq[qid];
589 		txq = &sc->sc_txq[qid];
590 
591 		for (i = 0; i < rxq->rxq_vq->vq_num; i++) {
592 			C_L(rxq->rxq_hdr_dmamaps[i], &rxq->rxq_hdrs[i],
593 			    sizeof(rxq->rxq_hdrs[0]), 1,
594 			    BUS_DMA_READ, "rx header");
595 			C(rxq->rxq_dmamaps[i], MCLBYTES, 1, "rx payload");
596 		}
597 
598 		for (i = 0; i < txq->txq_vq->vq_num; i++) {
599 			C_L(txq->txq_hdr_dmamaps[i], &txq->txq_hdrs[i],
600 			    sizeof(txq->txq_hdrs[0]), 1,
601 			    BUS_DMA_READ, "tx header");
602 			C(txq->txq_dmamaps[i], ETHER_MAX_LEN,
603 			    VIRTIO_NET_TX_MAXNSEGS, "tx payload");
604 		}
605 	}
606 
607 	if (sc->sc_has_ctrl) {
608 		/* control vq class & command */
609 		C_L(ctrlq->ctrlq_cmd_dmamap,
610 		    ctrlq->ctrlq_cmd, sizeof(*ctrlq->ctrlq_cmd), 1,
611 		    BUS_DMA_WRITE, "control command");
612 		C_L(ctrlq->ctrlq_status_dmamap,
613 		    ctrlq->ctrlq_status, sizeof(*ctrlq->ctrlq_status), 1,
614 		    BUS_DMA_READ, "control status");
615 
616 		/* control vq rx mode command parameter */
617 		C_L(ctrlq->ctrlq_rx_dmamap,
618 		    ctrlq->ctrlq_rx, sizeof(*ctrlq->ctrlq_rx), 1,
619 		    BUS_DMA_WRITE, "rx mode control command");
620 
621 		/* multiqueue set command */
622 		C_L(ctrlq->ctrlq_mq_dmamap,
623 		    ctrlq->ctrlq_mq, sizeof(*ctrlq->ctrlq_mq), 1,
624 		    BUS_DMA_WRITE, "multiqueue set command");
625 
626 		/* control vq MAC filter table for unicast */
627 		/* do not load now since its length is variable */
628 		C(ctrlq->ctrlq_tbl_uc_dmamap,
629 		    sizeof(*ctrlq->ctrlq_mac_tbl_uc) + 0, 1,
630 		    "unicast MAC address filter command");
631 
632 		/* control vq MAC filter table for multicast */
633 		C(ctrlq->ctrlq_tbl_mc_dmamap,
634 		    sizeof(*ctrlq->ctrlq_mac_tbl_mc)
635 		    + ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_MAXENTRIES, 1,
636 		    "multicast MAC address filter command");
637 	}
638 #undef C_L
639 #undef C
640 
641 	return 0;
642 
643 err_reqs:
644 #define D(map)								\
645 	do {								\
646 		if (map) {						\
647 			bus_dmamap_destroy(virtio_dmat(vsc), map);	\
648 			map = NULL;					\
649 		}							\
650 	} while (0)
651 	D(ctrlq->ctrlq_tbl_mc_dmamap);
652 	D(ctrlq->ctrlq_tbl_uc_dmamap);
653 	D(ctrlq->ctrlq_rx_dmamap);
654 	D(ctrlq->ctrlq_status_dmamap);
655 	D(ctrlq->ctrlq_cmd_dmamap);
656 	for (qid = 0; qid < sc->sc_max_nvq_pairs; qid++) {
657 		rxq = &sc->sc_rxq[qid];
658 		txq = &sc->sc_txq[qid];
659 
660 		for (i = 0; i < txq->txq_vq->vq_num; i++) {
661 			D(txq->txq_dmamaps[i]);
662 			D(txq->txq_hdr_dmamaps[i]);
663 		}
664 		for (i = 0; i < rxq->rxq_vq->vq_num; i++) {
665 			D(rxq->rxq_dmamaps[i]);
666 			D(rxq->rxq_hdr_dmamaps[i]);
667 		}
668 	}
669 #undef D
670 	if (sc->sc_kmem) {
671 		kmem_free(sc->sc_kmem, allocsize2);
672 		sc->sc_kmem = NULL;
673 	}
674 	bus_dmamem_unmap(virtio_dmat(vsc), sc->sc_dmamem, allocsize);
675 err_dmamem_alloc:
676 	bus_dmamem_free(virtio_dmat(vsc), &sc->sc_hdr_segs[0], 1);
677 err_none:
678 	return -1;
679 }
680 
681 static void
682 vioif_attach(device_t parent, device_t self, void *aux)
683 {
684 	struct vioif_softc *sc = device_private(self);
685 	struct virtio_softc *vsc = device_private(parent);
686 	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
687 	struct vioif_txqueue *txq;
688 	struct vioif_rxqueue *rxq;
689 	uint32_t features, req_features;
690 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
691 	u_int softint_flags;
692 	int r, i, nvqs=0, req_flags;
693 
694 	if (virtio_child(vsc) != NULL) {
695 		aprint_normal(": child already attached for %s; "
696 			      "something wrong...\n",
697 			      device_xname(parent));
698 		return;
699 	}
700 
701 	sc->sc_dev = self;
702 	sc->sc_virtio = vsc;
703 	sc->sc_link_active = false;
704 
705 	sc->sc_max_nvq_pairs = 1;
706 	sc->sc_req_nvq_pairs = 1;
707 	sc->sc_act_nvq_pairs = 1;
708 
709 	req_flags = 0;
710 
711 #ifdef VIOIF_MPSAFE
712 	req_flags |= VIRTIO_F_PCI_INTR_MPSAFE;
713 #endif
714 #ifdef VIOIF_SOFTINT_INTR
715 	req_flags |= VIRTIO_F_PCI_INTR_SOFTINT;
716 #endif
717 	req_flags |= VIRTIO_F_PCI_INTR_MSIX;
718 
719 	req_features =
720 	    VIRTIO_NET_F_MAC | VIRTIO_NET_F_STATUS | VIRTIO_NET_F_CTRL_VQ |
721 	    VIRTIO_NET_F_CTRL_RX | VIRTIO_F_NOTIFY_ON_EMPTY;
722 #ifdef VIOIF_MULTIQ
723 	req_features |= VIRTIO_NET_F_MQ;
724 #endif
725 	virtio_child_attach_start(vsc, self, IPL_NET, NULL,
726 	    vioif_config_change, virtio_vq_intr, req_flags,
727 	    req_features, VIRTIO_NET_FLAG_BITS);
728 
729 	features = virtio_features(vsc);
730 
731 	if (features & VIRTIO_NET_F_MAC) {
732 		sc->sc_mac[0] = virtio_read_device_config_1(vsc,
733 						    VIRTIO_NET_CONFIG_MAC+0);
734 		sc->sc_mac[1] = virtio_read_device_config_1(vsc,
735 						    VIRTIO_NET_CONFIG_MAC+1);
736 		sc->sc_mac[2] = virtio_read_device_config_1(vsc,
737 						    VIRTIO_NET_CONFIG_MAC+2);
738 		sc->sc_mac[3] = virtio_read_device_config_1(vsc,
739 						    VIRTIO_NET_CONFIG_MAC+3);
740 		sc->sc_mac[4] = virtio_read_device_config_1(vsc,
741 						    VIRTIO_NET_CONFIG_MAC+4);
742 		sc->sc_mac[5] = virtio_read_device_config_1(vsc,
743 						    VIRTIO_NET_CONFIG_MAC+5);
744 	} else {
745 		/* code stolen from sys/net/if_tap.c */
746 		struct timeval tv;
747 		uint32_t ui;
748 		getmicrouptime(&tv);
749 		ui = (tv.tv_sec ^ tv.tv_usec) & 0xffffff;
750 		memcpy(sc->sc_mac+3, (uint8_t *)&ui, 3);
751 		virtio_write_device_config_1(vsc,
752 					     VIRTIO_NET_CONFIG_MAC+0,
753 					     sc->sc_mac[0]);
754 		virtio_write_device_config_1(vsc,
755 					     VIRTIO_NET_CONFIG_MAC+1,
756 					     sc->sc_mac[1]);
757 		virtio_write_device_config_1(vsc,
758 					     VIRTIO_NET_CONFIG_MAC+2,
759 					     sc->sc_mac[2]);
760 		virtio_write_device_config_1(vsc,
761 					     VIRTIO_NET_CONFIG_MAC+3,
762 					     sc->sc_mac[3]);
763 		virtio_write_device_config_1(vsc,
764 					     VIRTIO_NET_CONFIG_MAC+4,
765 					     sc->sc_mac[4]);
766 		virtio_write_device_config_1(vsc,
767 					     VIRTIO_NET_CONFIG_MAC+5,
768 					     sc->sc_mac[5]);
769 	}
770 
771 	aprint_normal_dev(self, "Ethernet address %s\n", ether_sprintf(sc->sc_mac));
772 
773 	if ((features & VIRTIO_NET_F_CTRL_VQ) &&
774 	    (features & VIRTIO_NET_F_CTRL_RX)) {
775 		sc->sc_has_ctrl = true;
776 
777 		cv_init(&ctrlq->ctrlq_wait, "ctrl_vq");
778 		mutex_init(&ctrlq->ctrlq_wait_lock, MUTEX_DEFAULT, IPL_NET);
779 		ctrlq->ctrlq_inuse = FREE;
780 	} else {
781 		sc->sc_has_ctrl = false;
782 	}
783 
784 	if (sc->sc_has_ctrl && (features & VIRTIO_NET_F_MQ)) {
785 		sc->sc_max_nvq_pairs = virtio_read_device_config_2(vsc,
786 		    VIRTIO_NET_CONFIG_MAX_VQ_PAIRS);
787 
788 		if (sc->sc_max_nvq_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX)
789 			goto err;
790 
791 		/* Limit the number of queue pairs to use */
792 		sc->sc_req_nvq_pairs = MIN(sc->sc_max_nvq_pairs, ncpu);
793 	}
794 
795 	r = vioif_alloc_queues(sc);
796 	if (r != 0)
797 		goto err;
798 
799 	virtio_child_attach_set_vqs(vsc, sc->sc_vqs, sc->sc_req_nvq_pairs);
800 
801 #ifdef VIOIF_MPSAFE
802 	softint_flags = SOFTINT_NET | SOFTINT_MPSAFE;
803 #else
804 	softint_flags = SOFTINT_NET;
805 #endif
806 
807 	/*
808 	 * Allocating a virtqueues
809 	 */
810 	for (i = 0; i < sc->sc_max_nvq_pairs; i++) {
811 		rxq = &sc->sc_rxq[i];
812 		txq = &sc->sc_txq[i];
813 		char qname[32];
814 
815 		rxq->rxq_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NET);
816 
817 		rxq->rxq_softint = softint_establish(softint_flags, vioif_rx_softint, rxq);
818 		if (rxq->rxq_softint == NULL) {
819 			aprint_error_dev(self, "cannot establish rx softint\n");
820 			goto err;
821 		}
822 		snprintf(qname, sizeof(qname), "rx%d", i);
823 		r = virtio_alloc_vq(vsc, rxq->rxq_vq, nvqs,
824 		    MCLBYTES+sizeof(struct virtio_net_hdr), nvqs, qname);
825 		if (r != 0)
826 			goto err;
827 		nvqs++;
828 		rxq->rxq_vq->vq_done = vioif_rx_vq_done;
829 		rxq->rxq_vq->vq_done_ctx = (void *)rxq;
830 		rxq->rxq_stopping = true;
831 
832 		txq->txq_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NET);
833 		txq->txq_deferred_transmit = softint_establish(softint_flags,
834 		    vioif_deferred_transmit, txq);
835 		if (txq->txq_deferred_transmit == NULL) {
836 			aprint_error_dev(self, "cannot establish tx softint\n");
837 			goto err;
838 		}
839 		snprintf(qname, sizeof(qname), "tx%d", i);
840 		r = virtio_alloc_vq(vsc, txq->txq_vq, nvqs,
841 		    (sizeof(struct virtio_net_hdr)
842 			+ (ETHER_MAX_LEN - ETHER_HDR_LEN)),
843 		    VIRTIO_NET_TX_MAXNSEGS + 1, qname);
844 		if (r != 0)
845 			goto err;
846 		nvqs++;
847 		txq->txq_vq->vq_done = vioif_tx_vq_done;
848 		txq->txq_vq->vq_done_ctx = (void *)txq;
849 		txq->txq_link_active = sc->sc_link_active;
850 		txq->txq_stopping = false;
851 		txq->txq_intrq = pcq_create(txq->txq_vq->vq_num, KM_NOSLEEP);
852 		if (txq->txq_intrq == NULL)
853 			goto err;
854 	}
855 
856 	if (sc->sc_has_ctrl) {
857 		/*
858 		 * Allocating a virtqueue for control channel
859 		 */
860 		r = virtio_alloc_vq(vsc, ctrlq->ctrlq_vq, nvqs,
861 		    NBPG, 1, "control");
862 		if (r != 0) {
863 			aprint_error_dev(self, "failed to allocate "
864 			    "a virtqueue for control channel\n");
865 
866 			sc->sc_has_ctrl = false;
867 			cv_destroy(&ctrlq->ctrlq_wait);
868 			mutex_destroy(&ctrlq->ctrlq_wait_lock);
869 		} else {
870 			nvqs++;
871 			ctrlq->ctrlq_vq->vq_done = vioif_ctrl_vq_done;
872 			ctrlq->ctrlq_vq->vq_done_ctx = (void *) ctrlq;
873 		}
874 	}
875 
876 	sc->sc_ctl_softint = softint_establish(softint_flags,
877 	    vioif_ctl_softint, sc);
878 	if (sc->sc_ctl_softint == NULL) {
879 		aprint_error_dev(self, "cannot establish ctl softint\n");
880 		goto err;
881 	}
882 
883 	if (vioif_alloc_mems(sc) < 0)
884 		goto err;
885 
886 	if (virtio_child_attach_finish(vsc) != 0)
887 		goto err;
888 
889 	strlcpy(ifp->if_xname, device_xname(self), IFNAMSIZ);
890 	ifp->if_softc = sc;
891 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
892 #ifdef VIOIF_MPSAFE
893 	ifp->if_extflags = IFEF_MPSAFE;
894 #endif
895 	ifp->if_start = vioif_start;
896 	if (sc->sc_req_nvq_pairs > 1)
897 		ifp->if_transmit = vioif_transmit;
898 	ifp->if_ioctl = vioif_ioctl;
899 	ifp->if_init = vioif_init;
900 	ifp->if_stop = vioif_stop;
901 	ifp->if_capabilities = 0;
902 	ifp->if_watchdog = vioif_watchdog;
903 	txq = &sc->sc_txq[0];
904 	IFQ_SET_MAXLEN(&ifp->if_snd, MAX(txq->txq_vq->vq_num, IFQ_MAXLEN));
905 	IFQ_SET_READY(&ifp->if_snd);
906 
907 	sc->sc_ethercom.ec_capabilities |= ETHERCAP_VLAN_MTU;
908 
909 	if_attach(ifp);
910 	if_deferred_start_init(ifp, NULL);
911 	ether_ifattach(ifp, sc->sc_mac);
912 
913 	return;
914 
915 err:
916 	for (i = 0; i < sc->sc_max_nvq_pairs; i++) {
917 		rxq = &sc->sc_rxq[i];
918 		txq = &sc->sc_txq[i];
919 
920 		if (rxq->rxq_lock) {
921 			mutex_obj_free(rxq->rxq_lock);
922 			rxq->rxq_lock = NULL;
923 		}
924 
925 		if (rxq->rxq_softint) {
926 			softint_disestablish(rxq->rxq_softint);
927 			rxq->rxq_softint = NULL;
928 		}
929 
930 		if (txq->txq_lock) {
931 			mutex_obj_free(txq->txq_lock);
932 			txq->txq_lock = NULL;
933 		}
934 
935 		if (txq->txq_deferred_transmit) {
936 			softint_disestablish(txq->txq_deferred_transmit);
937 			txq->txq_deferred_transmit = NULL;
938 		}
939 
940 		if (txq->txq_intrq) {
941 			pcq_destroy(txq->txq_intrq);
942 			txq->txq_intrq = NULL;
943 		}
944 	}
945 
946 	if (sc->sc_has_ctrl) {
947 		cv_destroy(&ctrlq->ctrlq_wait);
948 		mutex_destroy(&ctrlq->ctrlq_wait_lock);
949 	}
950 
951 	while (nvqs > 0)
952 		virtio_free_vq(vsc, &sc->sc_vqs[--nvqs]);
953 
954 	vioif_free_queues(sc);
955 
956 	virtio_child_attach_failed(vsc);
957 	return;
958 }
959 
960 /* we need interrupts to make promiscuous mode off */
961 static void
962 vioif_deferred_init(device_t self)
963 {
964 	struct vioif_softc *sc = device_private(self);
965 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
966 	int r;
967 
968 	if (ifp->if_flags & IFF_PROMISC)
969 		return;
970 
971 	r =  vioif_set_promisc(sc, false);
972 	if (r != 0)
973 		aprint_error_dev(self, "resetting promisc mode failed, "
974 				 "errror code %d\n", r);
975 }
976 
977 static void
978 vioif_enable_interrupt_vqpairs(struct vioif_softc *sc)
979 {
980 	struct virtio_softc *vsc = sc->sc_virtio;
981 	struct vioif_txqueue *txq;
982 	struct vioif_rxqueue *rxq;
983 	int i;
984 
985 	for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
986 		txq = &sc->sc_txq[i];
987 		rxq = &sc->sc_rxq[i];
988 
989 		virtio_start_vq_intr(vsc, txq->txq_vq);
990 		virtio_start_vq_intr(vsc, rxq->rxq_vq);
991 	}
992 }
993 
994 static void
995 vioif_disable_interrupt_vqpairs(struct vioif_softc *sc)
996 {
997 	struct virtio_softc *vsc = sc->sc_virtio;
998 	struct vioif_txqueue *txq;
999 	struct vioif_rxqueue *rxq;
1000 	int i;
1001 
1002 	for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
1003 		txq = &sc->sc_txq[i];
1004 		rxq = &sc->sc_rxq[i];
1005 
1006 		virtio_stop_vq_intr(vsc, txq->txq_vq);
1007 		virtio_stop_vq_intr(vsc, rxq->rxq_vq);
1008 	}
1009 }
1010 
1011 /*
1012  * Interface functions for ifnet
1013  */
1014 static int
1015 vioif_init(struct ifnet *ifp)
1016 {
1017 	struct vioif_softc *sc = ifp->if_softc;
1018 	struct virtio_softc *vsc = sc->sc_virtio;
1019 	struct vioif_rxqueue *rxq;
1020 	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
1021 	int r, i;
1022 
1023 	vioif_stop(ifp, 0);
1024 
1025 	virtio_reinit_start(vsc);
1026 	virtio_negotiate_features(vsc, virtio_features(vsc));
1027 
1028 	for (i = 0; i < sc->sc_req_nvq_pairs; i++) {
1029 		rxq = &sc->sc_rxq[i];
1030 
1031 		/* Have to set false before vioif_populate_rx_mbufs */
1032 		rxq->rxq_stopping = false;
1033 		vioif_populate_rx_mbufs(rxq);
1034 	}
1035 
1036 	virtio_reinit_end(vsc);
1037 
1038 	if (sc->sc_has_ctrl)
1039 		virtio_start_vq_intr(vsc, ctrlq->ctrlq_vq);
1040 
1041 	r = vioif_ctrl_mq_vq_pairs_set(sc, sc->sc_req_nvq_pairs);
1042 	if (r == 0)
1043 		sc->sc_act_nvq_pairs = sc->sc_req_nvq_pairs;
1044 	else
1045 		sc->sc_act_nvq_pairs = 1;
1046 
1047 	for (i = 0; i < sc->sc_act_nvq_pairs; i++)
1048 		sc->sc_txq[i].txq_stopping = false;
1049 
1050 	vioif_enable_interrupt_vqpairs(sc);
1051 
1052 	if (!sc->sc_deferred_init_done) {
1053 		sc->sc_deferred_init_done = 1;
1054 		if (sc->sc_has_ctrl)
1055 			vioif_deferred_init(sc->sc_dev);
1056 	}
1057 
1058 	vioif_update_link_status(sc);
1059 	ifp->if_flags |= IFF_RUNNING;
1060 	ifp->if_flags &= ~IFF_OACTIVE;
1061 	vioif_rx_filter(sc);
1062 
1063 	return 0;
1064 }
1065 
1066 static void
1067 vioif_stop(struct ifnet *ifp, int disable)
1068 {
1069 	struct vioif_softc *sc = ifp->if_softc;
1070 	struct virtio_softc *vsc = sc->sc_virtio;
1071 	struct vioif_txqueue *txq;
1072 	struct vioif_rxqueue *rxq;
1073 	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
1074 	int i;
1075 
1076 	/* Take the locks to ensure that ongoing TX/RX finish */
1077 	for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
1078 		txq = &sc->sc_txq[i];
1079 		rxq = &sc->sc_rxq[i];
1080 
1081 		mutex_enter(txq->txq_lock);
1082 		txq->txq_stopping = true;
1083 		mutex_exit(txq->txq_lock);
1084 
1085 		mutex_enter(rxq->rxq_lock);
1086 		rxq->rxq_stopping = true;
1087 		mutex_exit(rxq->rxq_lock);
1088 	}
1089 
1090 	/* disable interrupts */
1091 	vioif_disable_interrupt_vqpairs(sc);
1092 
1093 	if (sc->sc_has_ctrl)
1094 		virtio_stop_vq_intr(vsc, ctrlq->ctrlq_vq);
1095 
1096 	/* only way to stop I/O and DMA is resetting... */
1097 	virtio_reset(vsc);
1098 	for (i = 0; i < sc->sc_act_nvq_pairs; i++)
1099 		vioif_rx_deq(&sc->sc_rxq[i]);
1100 
1101 	ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);
1102 	sc->sc_link_active = false;
1103 
1104 	for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
1105 		txq = &sc->sc_txq[i];
1106 		rxq = &sc->sc_rxq[i];
1107 
1108 		txq->txq_link_active = false;
1109 
1110 		if (disable)
1111 			vioif_rx_drain(rxq);
1112 
1113 		vioif_tx_drain(txq);
1114 	}
1115 }
1116 
1117 static void
1118 vioif_send_common_locked(struct ifnet *ifp, struct vioif_txqueue *txq,
1119     bool is_transmit)
1120 {
1121 	struct vioif_softc *sc = ifp->if_softc;
1122 	struct virtio_softc *vsc = sc->sc_virtio;
1123 	struct virtqueue *vq = txq->txq_vq;
1124 	struct mbuf *m;
1125 	int queued = 0;
1126 
1127 	KASSERT(mutex_owned(txq->txq_lock));
1128 
1129 	if ((ifp->if_flags & IFF_RUNNING) == 0)
1130 		return;
1131 
1132 	if (!txq->txq_link_active || txq->txq_stopping)
1133 		return;
1134 
1135 	if ((ifp->if_flags & IFF_OACTIVE) != 0 && !is_transmit)
1136 		return;
1137 
1138 	for (;;) {
1139 		int slot, r;
1140 
1141 		if (is_transmit)
1142 			m = pcq_get(txq->txq_intrq);
1143 		else
1144 			IFQ_DEQUEUE(&ifp->if_snd, m);
1145 
1146 		if (m == NULL)
1147 			break;
1148 
1149 		r = virtio_enqueue_prep(vsc, vq, &slot);
1150 		if (r == EAGAIN) {
1151 			ifp->if_flags |= IFF_OACTIVE;
1152 			m_freem(m);
1153 			break;
1154 		}
1155 		if (r != 0)
1156 			panic("enqueue_prep for a tx buffer");
1157 
1158 		r = bus_dmamap_load_mbuf(virtio_dmat(vsc),
1159 					 txq->txq_dmamaps[slot],
1160 					 m, BUS_DMA_WRITE | BUS_DMA_NOWAIT);
1161 		if (r != 0) {
1162 			/* maybe just too fragmented */
1163 			struct mbuf *newm;
1164 
1165 			newm = m_defrag(m, M_NOWAIT);
1166 			if (newm == NULL) {
1167 				aprint_error_dev(sc->sc_dev,
1168 				    "m_defrag() failed\n");
1169 				goto skip;
1170 			}
1171 
1172 			m = newm;
1173 			r = bus_dmamap_load_mbuf(virtio_dmat(vsc),
1174 					 txq->txq_dmamaps[slot],
1175 					 m, BUS_DMA_WRITE | BUS_DMA_NOWAIT);
1176 			if (r != 0) {
1177 				aprint_error_dev(sc->sc_dev,
1178 				    "tx dmamap load failed, error code %d\n",
1179 				    r);
1180 skip:
1181 				m_freem(m);
1182 				virtio_enqueue_abort(vsc, vq, slot);
1183 				continue;
1184 			}
1185 		}
1186 
1187 		/* This should actually never fail */
1188 		r = virtio_enqueue_reserve(vsc, vq, slot,
1189 					txq->txq_dmamaps[slot]->dm_nsegs + 1);
1190 		if (r != 0) {
1191 			aprint_error_dev(sc->sc_dev,
1192 			    "virtio_enqueue_reserve failed, error code %d\n",
1193 			    r);
1194 			bus_dmamap_unload(virtio_dmat(vsc),
1195 					  txq->txq_dmamaps[slot]);
1196 			/* slot already freed by virtio_enqueue_reserve */
1197 			m_freem(m);
1198 			continue;
1199 		}
1200 
1201 		txq->txq_mbufs[slot] = m;
1202 
1203 		memset(&txq->txq_hdrs[slot], 0, sizeof(struct virtio_net_hdr));
1204 		bus_dmamap_sync(virtio_dmat(vsc), txq->txq_dmamaps[slot],
1205 				0, txq->txq_dmamaps[slot]->dm_mapsize,
1206 				BUS_DMASYNC_PREWRITE);
1207 		bus_dmamap_sync(virtio_dmat(vsc), txq->txq_hdr_dmamaps[slot],
1208 				0, txq->txq_hdr_dmamaps[slot]->dm_mapsize,
1209 				BUS_DMASYNC_PREWRITE);
1210 		virtio_enqueue(vsc, vq, slot, txq->txq_hdr_dmamaps[slot], true);
1211 		virtio_enqueue(vsc, vq, slot, txq->txq_dmamaps[slot], true);
1212 		virtio_enqueue_commit(vsc, vq, slot, false);
1213 
1214 		queued++;
1215 		bpf_mtap(ifp, m, BPF_D_OUT);
1216 	}
1217 
1218 	if (queued > 0) {
1219 		virtio_enqueue_commit(vsc, vq, -1, true);
1220 		ifp->if_timer = 5;
1221 	}
1222 }
1223 
1224 static void
1225 vioif_start_locked(struct ifnet *ifp, struct vioif_txqueue *txq)
1226 {
1227 
1228 	/*
1229 	 * ifp->if_obytes and ifp->if_omcasts are added in if_transmit()@if.c.
1230 	 */
1231 	vioif_send_common_locked(ifp, txq, false);
1232 
1233 }
1234 
1235 static void
1236 vioif_start(struct ifnet *ifp)
1237 {
1238 	struct vioif_softc *sc = ifp->if_softc;
1239 	struct vioif_txqueue *txq = &sc->sc_txq[0];
1240 
1241 #ifdef VIOIF_MPSAFE
1242 	KASSERT(if_is_mpsafe(ifp));
1243 #endif
1244 
1245 	mutex_enter(txq->txq_lock);
1246 	if (!txq->txq_stopping)
1247 		vioif_start_locked(ifp, txq);
1248 	mutex_exit(txq->txq_lock);
1249 }
1250 
1251 static inline int
1252 vioif_select_txqueue(struct ifnet *ifp, struct mbuf *m)
1253 {
1254 	struct vioif_softc *sc = ifp->if_softc;
1255 	u_int cpuid = cpu_index(curcpu());
1256 
1257 	return cpuid % sc->sc_act_nvq_pairs;
1258 }
1259 
1260 static void
1261 vioif_transmit_locked(struct ifnet *ifp, struct vioif_txqueue *txq)
1262 {
1263 
1264 	vioif_send_common_locked(ifp, txq, true);
1265 }
1266 
1267 static int
1268 vioif_transmit(struct ifnet *ifp, struct mbuf *m)
1269 {
1270 	struct vioif_softc *sc = ifp->if_softc;
1271 	struct vioif_txqueue *txq;
1272 	int qid;
1273 
1274 	qid = vioif_select_txqueue(ifp, m);
1275 	txq = &sc->sc_txq[qid];
1276 
1277 	if (__predict_false(!pcq_put(txq->txq_intrq, m))) {
1278 		m_freem(m);
1279 		return ENOBUFS;
1280 	}
1281 
1282 	ifp->if_obytes += m->m_pkthdr.len;
1283 	if (m->m_flags & M_MCAST)
1284 		ifp->if_omcasts++;
1285 
1286 	if (mutex_tryenter(txq->txq_lock)) {
1287 		if (!txq->txq_stopping)
1288 			vioif_transmit_locked(ifp, txq);
1289 		mutex_exit(txq->txq_lock);
1290 	}
1291 
1292 	return 0;
1293 }
1294 
1295 static void
1296 vioif_deferred_transmit(void *arg)
1297 {
1298 	struct vioif_txqueue *txq = arg;
1299 	struct virtio_softc *vsc = txq->txq_vq->vq_owner;
1300 	struct vioif_softc *sc = device_private(virtio_child(vsc));
1301 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1302 
1303 	if (mutex_tryenter(txq->txq_lock)) {
1304 		vioif_send_common_locked(ifp, txq, true);
1305 		mutex_exit(txq->txq_lock);
1306 	}
1307 }
1308 
1309 static int
1310 vioif_ioctl(struct ifnet *ifp, u_long cmd, void *data)
1311 {
1312 	int s, r;
1313 
1314 	s = splnet();
1315 
1316 	r = ether_ioctl(ifp, cmd, data);
1317 	if ((r == 0 && cmd == SIOCSIFFLAGS) ||
1318 	    (r == ENETRESET && (cmd == SIOCADDMULTI || cmd == SIOCDELMULTI))) {
1319 		if (ifp->if_flags & IFF_RUNNING)
1320 			r = vioif_rx_filter(ifp->if_softc);
1321 		else
1322 			r = 0;
1323 	}
1324 
1325 	splx(s);
1326 
1327 	return r;
1328 }
1329 
1330 void
1331 vioif_watchdog(struct ifnet *ifp)
1332 {
1333 	struct vioif_softc *sc = ifp->if_softc;
1334 	int i;
1335 
1336 	if (ifp->if_flags & IFF_RUNNING) {
1337 		for (i = 0; i < sc->sc_act_nvq_pairs; i++)
1338 			vioif_tx_vq_done(sc->sc_txq[i].txq_vq);
1339 	}
1340 }
1341 
1342 
1343 /*
1344  * Receive implementation
1345  */
1346 /* allocate and initialize a mbuf for receive */
1347 static int
1348 vioif_add_rx_mbuf(struct vioif_rxqueue *rxq, int i)
1349 {
1350 	struct virtio_softc *vsc = rxq->rxq_vq->vq_owner;
1351 	struct mbuf *m;
1352 	int r;
1353 
1354 	MGETHDR(m, M_DONTWAIT, MT_DATA);
1355 	if (m == NULL)
1356 		return ENOBUFS;
1357 	MCLGET(m, M_DONTWAIT);
1358 	if ((m->m_flags & M_EXT) == 0) {
1359 		m_freem(m);
1360 		return ENOBUFS;
1361 	}
1362 	rxq->rxq_mbufs[i] = m;
1363 	m->m_len = m->m_pkthdr.len = m->m_ext.ext_size;
1364 	r = bus_dmamap_load_mbuf(virtio_dmat(vsc),
1365 				 rxq->rxq_dmamaps[i],
1366 				 m, BUS_DMA_READ | BUS_DMA_NOWAIT);
1367 	if (r) {
1368 		m_freem(m);
1369 		rxq->rxq_mbufs[i] = NULL;
1370 		return r;
1371 	}
1372 
1373 	return 0;
1374 }
1375 
1376 /* free a mbuf for receive */
1377 static void
1378 vioif_free_rx_mbuf(struct vioif_rxqueue *rxq, int i)
1379 {
1380 	struct virtio_softc *vsc = rxq->rxq_vq->vq_owner;
1381 
1382 	bus_dmamap_unload(virtio_dmat(vsc), rxq->rxq_dmamaps[i]);
1383 	m_freem(rxq->rxq_mbufs[i]);
1384 	rxq->rxq_mbufs[i] = NULL;
1385 }
1386 
1387 /* add mbufs for all the empty receive slots */
1388 static void
1389 vioif_populate_rx_mbufs(struct vioif_rxqueue *rxq)
1390 {
1391 
1392 	mutex_enter(rxq->rxq_lock);
1393 	vioif_populate_rx_mbufs_locked(rxq);
1394 	mutex_exit(rxq->rxq_lock);
1395 }
1396 
1397 static void
1398 vioif_populate_rx_mbufs_locked(struct vioif_rxqueue *rxq)
1399 {
1400 	struct virtqueue *vq = rxq->rxq_vq;
1401 	struct virtio_softc *vsc = vq->vq_owner;
1402 	struct vioif_softc *sc = device_private(virtio_child(vsc));
1403 	int i, r, ndone = 0;
1404 
1405 	KASSERT(mutex_owned(rxq->rxq_lock));
1406 
1407 	if (rxq->rxq_stopping)
1408 		return;
1409 
1410 	for (i = 0; i < vq->vq_num; i++) {
1411 		int slot;
1412 		r = virtio_enqueue_prep(vsc, vq, &slot);
1413 		if (r == EAGAIN)
1414 			break;
1415 		if (r != 0)
1416 			panic("enqueue_prep for rx buffers");
1417 		if (rxq->rxq_mbufs[slot] == NULL) {
1418 			r = vioif_add_rx_mbuf(rxq, slot);
1419 			if (r != 0) {
1420 				printf("%s: rx mbuf allocation failed, "
1421 				       "error code %d\n",
1422 				       device_xname(sc->sc_dev), r);
1423 				break;
1424 			}
1425 		}
1426 		r = virtio_enqueue_reserve(vsc, vq, slot,
1427 					rxq->rxq_dmamaps[slot]->dm_nsegs + 1);
1428 		if (r != 0) {
1429 			vioif_free_rx_mbuf(rxq, slot);
1430 			break;
1431 		}
1432 		bus_dmamap_sync(virtio_dmat(vsc), rxq->rxq_hdr_dmamaps[slot],
1433 			0, sizeof(struct virtio_net_hdr), BUS_DMASYNC_PREREAD);
1434 		bus_dmamap_sync(virtio_dmat(vsc), rxq->rxq_dmamaps[slot],
1435 			0, MCLBYTES, BUS_DMASYNC_PREREAD);
1436 		virtio_enqueue(vsc, vq, slot, rxq->rxq_hdr_dmamaps[slot], false);
1437 		virtio_enqueue(vsc, vq, slot, rxq->rxq_dmamaps[slot], false);
1438 		virtio_enqueue_commit(vsc, vq, slot, false);
1439 		ndone++;
1440 	}
1441 	if (ndone > 0)
1442 		virtio_enqueue_commit(vsc, vq, -1, true);
1443 }
1444 
1445 /* dequeue received packets */
1446 static int
1447 vioif_rx_deq(struct vioif_rxqueue *rxq)
1448 {
1449 	int r;
1450 
1451 	KASSERT(rxq->rxq_stopping);
1452 
1453 	mutex_enter(rxq->rxq_lock);
1454 	r = vioif_rx_deq_locked(rxq);
1455 	mutex_exit(rxq->rxq_lock);
1456 
1457 	return r;
1458 }
1459 
1460 /* dequeue received packets */
1461 static int
1462 vioif_rx_deq_locked(struct vioif_rxqueue *rxq)
1463 {
1464 	struct virtqueue *vq = rxq->rxq_vq;
1465 	struct virtio_softc *vsc = vq->vq_owner;
1466 	struct vioif_softc *sc = device_private(virtio_child(vsc));
1467 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1468 	struct mbuf *m;
1469 	int r = 0;
1470 	int slot, len;
1471 
1472 	KASSERT(mutex_owned(rxq->rxq_lock));
1473 
1474 	while (virtio_dequeue(vsc, vq, &slot, &len) == 0) {
1475 		len -= sizeof(struct virtio_net_hdr);
1476 		r = 1;
1477 		bus_dmamap_sync(virtio_dmat(vsc), rxq->rxq_hdr_dmamaps[slot],
1478 				0, sizeof(struct virtio_net_hdr),
1479 				BUS_DMASYNC_POSTREAD);
1480 		bus_dmamap_sync(virtio_dmat(vsc), rxq->rxq_dmamaps[slot],
1481 				0, MCLBYTES,
1482 				BUS_DMASYNC_POSTREAD);
1483 		m = rxq->rxq_mbufs[slot];
1484 		KASSERT(m != NULL);
1485 		bus_dmamap_unload(virtio_dmat(vsc), rxq->rxq_dmamaps[slot]);
1486 		rxq->rxq_mbufs[slot] = NULL;
1487 		virtio_dequeue_commit(vsc, vq, slot);
1488 		m_set_rcvif(m, ifp);
1489 		m->m_len = m->m_pkthdr.len = len;
1490 
1491 		mutex_exit(rxq->rxq_lock);
1492 		if_percpuq_enqueue(ifp->if_percpuq, m);
1493 		mutex_enter(rxq->rxq_lock);
1494 
1495 		if (rxq->rxq_stopping)
1496 			break;
1497 	}
1498 
1499 	return r;
1500 }
1501 
1502 /* rx interrupt; call _dequeue above and schedule a softint */
1503 static int
1504 vioif_rx_vq_done(struct virtqueue *vq)
1505 {
1506 	struct vioif_rxqueue *rxq = vq->vq_done_ctx;
1507 	int r = 0;
1508 
1509 #ifdef VIOIF_SOFTINT_INTR
1510 	KASSERT(!cpu_intr_p());
1511 #endif
1512 
1513 	mutex_enter(rxq->rxq_lock);
1514 
1515 	if (rxq->rxq_stopping)
1516 		goto out;
1517 
1518 	r = vioif_rx_deq_locked(rxq);
1519 	if (r)
1520 #ifdef VIOIF_SOFTINT_INTR
1521 		vioif_populate_rx_mbufs_locked(rxq);
1522 #else
1523 		softint_schedule(rxq->rxq_softint);
1524 #endif
1525 
1526 out:
1527 	mutex_exit(rxq->rxq_lock);
1528 	return r;
1529 }
1530 
1531 /* softint: enqueue receive requests for new incoming packets */
1532 static void
1533 vioif_rx_softint(void *arg)
1534 {
1535 	struct vioif_rxqueue *rxq = arg;
1536 
1537 	vioif_populate_rx_mbufs(rxq);
1538 }
1539 
1540 /* free all the mbufs; called from if_stop(disable) */
1541 static void
1542 vioif_rx_drain(struct vioif_rxqueue *rxq)
1543 {
1544 	struct virtqueue *vq = rxq->rxq_vq;
1545 	int i;
1546 
1547 	for (i = 0; i < vq->vq_num; i++) {
1548 		if (rxq->rxq_mbufs[i] == NULL)
1549 			continue;
1550 		vioif_free_rx_mbuf(rxq, i);
1551 	}
1552 }
1553 
1554 
1555 /*
1556  * Transmition implementation
1557  */
1558 /* actual transmission is done in if_start */
1559 /* tx interrupt; dequeue and free mbufs */
1560 /*
1561  * tx interrupt is actually disabled; this should be called upon
1562  * tx vq full and watchdog
1563  */
1564 static int
1565 vioif_tx_vq_done(struct virtqueue *vq)
1566 {
1567 	struct virtio_softc *vsc = vq->vq_owner;
1568 	struct vioif_softc *sc = device_private(virtio_child(vsc));
1569 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1570 	struct vioif_txqueue *txq = vq->vq_done_ctx;
1571 	int r = 0;
1572 
1573 	mutex_enter(txq->txq_lock);
1574 
1575 	if (txq->txq_stopping)
1576 		goto out;
1577 
1578 	r = vioif_tx_vq_done_locked(vq);
1579 
1580 out:
1581 	mutex_exit(txq->txq_lock);
1582 	if (r) {
1583 		if_schedule_deferred_start(ifp);
1584 
1585 		KASSERT(txq->txq_deferred_transmit != NULL);
1586 		softint_schedule(txq->txq_deferred_transmit);
1587 	}
1588 	return r;
1589 }
1590 
1591 static int
1592 vioif_tx_vq_done_locked(struct virtqueue *vq)
1593 {
1594 	struct virtio_softc *vsc = vq->vq_owner;
1595 	struct vioif_softc *sc = device_private(virtio_child(vsc));
1596 	struct vioif_txqueue *txq = vq->vq_done_ctx;
1597 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1598 	struct mbuf *m;
1599 	int r = 0;
1600 	int slot, len;
1601 
1602 	KASSERT(mutex_owned(txq->txq_lock));
1603 
1604 	while (virtio_dequeue(vsc, vq, &slot, &len) == 0) {
1605 		r++;
1606 		bus_dmamap_sync(virtio_dmat(vsc), txq->txq_hdr_dmamaps[slot],
1607 				0, sizeof(struct virtio_net_hdr),
1608 				BUS_DMASYNC_POSTWRITE);
1609 		bus_dmamap_sync(virtio_dmat(vsc), txq->txq_dmamaps[slot],
1610 				0, txq->txq_dmamaps[slot]->dm_mapsize,
1611 				BUS_DMASYNC_POSTWRITE);
1612 		m = txq->txq_mbufs[slot];
1613 		bus_dmamap_unload(virtio_dmat(vsc), txq->txq_dmamaps[slot]);
1614 		txq->txq_mbufs[slot] = NULL;
1615 		virtio_dequeue_commit(vsc, vq, slot);
1616 		ifp->if_opackets++;
1617 		m_freem(m);
1618 	}
1619 
1620 	if (r)
1621 		ifp->if_flags &= ~IFF_OACTIVE;
1622 	return r;
1623 }
1624 
1625 /* free all the mbufs already put on vq; called from if_stop(disable) */
1626 static void
1627 vioif_tx_drain(struct vioif_txqueue *txq)
1628 {
1629 	struct virtqueue *vq = txq->txq_vq;
1630 	struct virtio_softc *vsc = vq->vq_owner;
1631 	int i;
1632 
1633 	KASSERT(txq->txq_stopping);
1634 
1635 	for (i = 0; i < vq->vq_num; i++) {
1636 		if (txq->txq_mbufs[i] == NULL)
1637 			continue;
1638 		bus_dmamap_unload(virtio_dmat(vsc), txq->txq_dmamaps[i]);
1639 		m_freem(txq->txq_mbufs[i]);
1640 		txq->txq_mbufs[i] = NULL;
1641 	}
1642 }
1643 
1644 /*
1645  * Control vq
1646  */
1647 /* issue a VIRTIO_NET_CTRL_RX class command and wait for completion */
1648 static void
1649 vioif_ctrl_acquire(struct vioif_softc *sc)
1650 {
1651 	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
1652 
1653 	mutex_enter(&ctrlq->ctrlq_wait_lock);
1654 	while (ctrlq->ctrlq_inuse != FREE)
1655 		cv_wait(&ctrlq->ctrlq_wait, &ctrlq->ctrlq_wait_lock);
1656 	ctrlq->ctrlq_inuse = INUSE;
1657 	ctrlq->ctrlq_owner = curlwp;
1658 	mutex_exit(&ctrlq->ctrlq_wait_lock);
1659 }
1660 
1661 static void
1662 vioif_ctrl_release(struct vioif_softc *sc)
1663 {
1664 	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
1665 
1666 	KASSERT(ctrlq->ctrlq_inuse != FREE);
1667 	KASSERT(ctrlq->ctrlq_owner == curlwp);
1668 
1669 	mutex_enter(&ctrlq->ctrlq_wait_lock);
1670 	ctrlq->ctrlq_inuse = FREE;
1671 	ctrlq->ctrlq_owner = NULL;
1672 	cv_signal(&ctrlq->ctrlq_wait);
1673 	mutex_exit(&ctrlq->ctrlq_wait_lock);
1674 }
1675 
1676 static int
1677 vioif_ctrl_load_cmdspec(struct vioif_softc *sc,
1678     struct vioif_ctrl_cmdspec *specs, int nspecs)
1679 {
1680 	struct virtio_softc *vsc = sc->sc_virtio;
1681 	int i, r, loaded;
1682 
1683 	loaded = 0;
1684 	for (i = 0; i < nspecs; i++) {
1685 		r = bus_dmamap_load(virtio_dmat(vsc),
1686 		    specs[i].dmamap, specs[i].buf, specs[i].bufsize,
1687 		    NULL, BUS_DMA_WRITE | BUS_DMA_NOWAIT);
1688 		if (r) {
1689 			printf("%s: control command dmamap load failed, "
1690 			       "error code %d\n", device_xname(sc->sc_dev), r);
1691 			goto err;
1692 		}
1693 		loaded++;
1694 
1695 	}
1696 
1697 	return r;
1698 
1699 err:
1700 	for (i = 0; i < loaded; i++) {
1701 		bus_dmamap_unload(virtio_dmat(vsc), specs[i].dmamap);
1702 	}
1703 
1704 	return r;
1705 }
1706 
1707 static void
1708 vioif_ctrl_unload_cmdspec(struct vioif_softc *sc,
1709     struct vioif_ctrl_cmdspec *specs, int nspecs)
1710 {
1711 	struct virtio_softc *vsc = sc->sc_virtio;
1712 	int i;
1713 
1714 	for (i = 0; i < nspecs; i++) {
1715 		bus_dmamap_unload(virtio_dmat(vsc), specs[i].dmamap);
1716 	}
1717 }
1718 
1719 static int
1720 vioif_ctrl_send_command(struct vioif_softc *sc, uint8_t class, uint8_t cmd,
1721     struct vioif_ctrl_cmdspec *specs, int nspecs)
1722 {
1723 	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
1724 	struct virtqueue *vq = ctrlq->ctrlq_vq;
1725 	struct virtio_softc *vsc = sc->sc_virtio;
1726 	int i, r, slot;
1727 
1728 	ctrlq->ctrlq_cmd->class = class;
1729 	ctrlq->ctrlq_cmd->command = cmd;
1730 
1731 	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_cmd_dmamap,
1732 			0, sizeof(struct virtio_net_ctrl_cmd),
1733 			BUS_DMASYNC_PREWRITE);
1734 	for (i = 0; i < nspecs; i++) {
1735 		bus_dmamap_sync(virtio_dmat(vsc), specs[i].dmamap,
1736 				0, specs[i].bufsize,
1737 				BUS_DMASYNC_PREWRITE);
1738 	}
1739 	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_status_dmamap,
1740 			0, sizeof(struct virtio_net_ctrl_status),
1741 			BUS_DMASYNC_PREREAD);
1742 
1743 	r = virtio_enqueue_prep(vsc, vq, &slot);
1744 	if (r != 0)
1745 		panic("%s: control vq busy!?", device_xname(sc->sc_dev));
1746 	r = virtio_enqueue_reserve(vsc, vq, slot, nspecs + 2);
1747 	if (r != 0)
1748 		panic("%s: control vq busy!?", device_xname(sc->sc_dev));
1749 	virtio_enqueue(vsc, vq, slot, ctrlq->ctrlq_cmd_dmamap, true);
1750 	for (i = 0; i < nspecs; i++) {
1751 		virtio_enqueue(vsc, vq, slot, specs[i].dmamap, true);
1752 	}
1753 	virtio_enqueue(vsc, vq, slot, ctrlq->ctrlq_status_dmamap, false);
1754 	virtio_enqueue_commit(vsc, vq, slot, true);
1755 
1756 	/* wait for done */
1757 	mutex_enter(&ctrlq->ctrlq_wait_lock);
1758 	while (ctrlq->ctrlq_inuse != DONE)
1759 		cv_wait(&ctrlq->ctrlq_wait, &ctrlq->ctrlq_wait_lock);
1760 	mutex_exit(&ctrlq->ctrlq_wait_lock);
1761 	/* already dequeueued */
1762 
1763 	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_cmd_dmamap, 0,
1764 			sizeof(struct virtio_net_ctrl_cmd),
1765 			BUS_DMASYNC_POSTWRITE);
1766 	for (i = 0; i < nspecs; i++) {
1767 		bus_dmamap_sync(virtio_dmat(vsc), specs[i].dmamap, 0,
1768 				specs[i].bufsize,
1769 				BUS_DMASYNC_POSTWRITE);
1770 	}
1771 	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_status_dmamap, 0,
1772 			sizeof(struct virtio_net_ctrl_status),
1773 			BUS_DMASYNC_POSTREAD);
1774 
1775 	if (ctrlq->ctrlq_status->ack == VIRTIO_NET_OK)
1776 		r = 0;
1777 	else {
1778 		printf("%s: failed setting rx mode\n",
1779 		       device_xname(sc->sc_dev));
1780 		r = EIO;
1781 	}
1782 
1783 	return r;
1784 }
1785 
1786 static int
1787 vioif_ctrl_rx(struct vioif_softc *sc, int cmd, bool onoff)
1788 {
1789 	struct virtio_net_ctrl_rx *rx = sc->sc_ctrlq.ctrlq_rx;
1790 	struct vioif_ctrl_cmdspec specs[1];
1791 	int r;
1792 
1793 	if (!sc->sc_has_ctrl)
1794 		return ENOTSUP;
1795 
1796 	vioif_ctrl_acquire(sc);
1797 
1798 	rx->onoff = onoff;
1799 	specs[0].dmamap = sc->sc_ctrlq.ctrlq_rx_dmamap;
1800 	specs[0].buf = rx;
1801 	specs[0].bufsize = sizeof(*rx);
1802 
1803 	r = vioif_ctrl_send_command(sc, VIRTIO_NET_CTRL_RX, cmd,
1804 	    specs, __arraycount(specs));
1805 
1806 	vioif_ctrl_release(sc);
1807 	return r;
1808 }
1809 
1810 static int
1811 vioif_set_promisc(struct vioif_softc *sc, bool onoff)
1812 {
1813 	int r;
1814 
1815 	r = vioif_ctrl_rx(sc, VIRTIO_NET_CTRL_RX_PROMISC, onoff);
1816 
1817 	return r;
1818 }
1819 
1820 static int
1821 vioif_set_allmulti(struct vioif_softc *sc, bool onoff)
1822 {
1823 	int r;
1824 
1825 	r = vioif_ctrl_rx(sc, VIRTIO_NET_CTRL_RX_ALLMULTI, onoff);
1826 
1827 	return r;
1828 }
1829 
1830 /* issue VIRTIO_NET_CTRL_MAC_TABLE_SET command and wait for completion */
1831 static int
1832 vioif_set_rx_filter(struct vioif_softc *sc)
1833 {
1834 	/* filter already set in ctrlq->ctrlq_mac_tbl */
1835 	struct virtio_net_ctrl_mac_tbl *mac_tbl_uc, *mac_tbl_mc;
1836 	struct vioif_ctrl_cmdspec specs[2];
1837 	int nspecs = __arraycount(specs);
1838 	int r;
1839 
1840 	mac_tbl_uc = sc->sc_ctrlq.ctrlq_mac_tbl_uc;
1841 	mac_tbl_mc = sc->sc_ctrlq.ctrlq_mac_tbl_mc;
1842 
1843 	if (!sc->sc_has_ctrl)
1844 		return ENOTSUP;
1845 
1846 	vioif_ctrl_acquire(sc);
1847 
1848 	specs[0].dmamap = sc->sc_ctrlq.ctrlq_tbl_uc_dmamap;
1849 	specs[0].buf = mac_tbl_uc;
1850 	specs[0].bufsize = sizeof(*mac_tbl_uc)
1851 	    + (ETHER_ADDR_LEN * mac_tbl_uc->nentries);
1852 
1853 	specs[1].dmamap = sc->sc_ctrlq.ctrlq_tbl_mc_dmamap;
1854 	specs[1].buf = mac_tbl_mc;
1855 	specs[1].bufsize = sizeof(*mac_tbl_mc)
1856 	    + (ETHER_ADDR_LEN * mac_tbl_mc->nentries);
1857 
1858 	r = vioif_ctrl_load_cmdspec(sc, specs, nspecs);
1859 	if (r != 0)
1860 		goto out;
1861 
1862 	r = vioif_ctrl_send_command(sc,
1863 	    VIRTIO_NET_CTRL_MAC, VIRTIO_NET_CTRL_MAC_TABLE_SET,
1864 	    specs, nspecs);
1865 
1866 	vioif_ctrl_unload_cmdspec(sc, specs, nspecs);
1867 
1868 out:
1869 	vioif_ctrl_release(sc);
1870 
1871 	return r;
1872 }
1873 
1874 static int
1875 vioif_ctrl_mq_vq_pairs_set(struct vioif_softc *sc, int nvq_pairs)
1876 {
1877 	struct virtio_net_ctrl_mq *mq = sc->sc_ctrlq.ctrlq_mq;
1878 	struct vioif_ctrl_cmdspec specs[1];
1879 	int r;
1880 
1881 	if (!sc->sc_has_ctrl)
1882 		return ENOTSUP;
1883 
1884 	if (nvq_pairs <= 1)
1885 		return EINVAL;
1886 
1887 	vioif_ctrl_acquire(sc);
1888 
1889 	mq->virtqueue_pairs = nvq_pairs;
1890 	specs[0].dmamap = sc->sc_ctrlq.ctrlq_mq_dmamap;
1891 	specs[0].buf = mq;
1892 	specs[0].bufsize = sizeof(*mq);
1893 
1894 	r = vioif_ctrl_send_command(sc,
1895 	    VIRTIO_NET_CTRL_MQ, VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET,
1896 	    specs, __arraycount(specs));
1897 
1898 	vioif_ctrl_release(sc);
1899 
1900 	return r;
1901 }
1902 
1903 /* ctrl vq interrupt; wake up the command issuer */
1904 static int
1905 vioif_ctrl_vq_done(struct virtqueue *vq)
1906 {
1907 	struct virtio_softc *vsc = vq->vq_owner;
1908 	struct vioif_ctrlqueue *ctrlq = vq->vq_done_ctx;
1909 	int r, slot;
1910 
1911 	r = virtio_dequeue(vsc, vq, &slot, NULL);
1912 	if (r == ENOENT)
1913 		return 0;
1914 	virtio_dequeue_commit(vsc, vq, slot);
1915 
1916 	mutex_enter(&ctrlq->ctrlq_wait_lock);
1917 	ctrlq->ctrlq_inuse = DONE;
1918 	cv_signal(&ctrlq->ctrlq_wait);
1919 	mutex_exit(&ctrlq->ctrlq_wait_lock);
1920 
1921 	return 1;
1922 }
1923 
1924 /*
1925  * If IFF_PROMISC requested,  set promiscuous
1926  * If multicast filter small enough (<=MAXENTRIES) set rx filter
1927  * If large multicast filter exist use ALLMULTI
1928  */
1929 /*
1930  * If setting rx filter fails fall back to ALLMULTI
1931  * If ALLMULTI fails fall back to PROMISC
1932  */
1933 static int
1934 vioif_rx_filter(struct vioif_softc *sc)
1935 {
1936 	struct ethercom *ec = &sc->sc_ethercom;
1937 	struct ifnet *ifp = &ec->ec_if;
1938 	struct ether_multi *enm;
1939 	struct ether_multistep step;
1940 	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
1941 	int nentries;
1942 	int promisc = 0, allmulti = 0, rxfilter = 0;
1943 	int r;
1944 
1945 	if (!sc->sc_has_ctrl) {	/* no ctrl vq; always promisc */
1946 		ifp->if_flags |= IFF_PROMISC;
1947 		return 0;
1948 	}
1949 
1950 	if (ifp->if_flags & IFF_PROMISC) {
1951 		promisc = 1;
1952 		goto set;
1953 	}
1954 
1955 	nentries = -1;
1956 	ETHER_LOCK(ec);
1957 	ETHER_FIRST_MULTI(step, ec, enm);
1958 	while (nentries++, enm != NULL) {
1959 		if (nentries >= VIRTIO_NET_CTRL_MAC_MAXENTRIES) {
1960 			allmulti = 1;
1961 			goto set_unlock;
1962 		}
1963 		if (memcmp(enm->enm_addrlo, enm->enm_addrhi,
1964 			   ETHER_ADDR_LEN)) {
1965 			allmulti = 1;
1966 			goto set_unlock;
1967 		}
1968 		memcpy(ctrlq->ctrlq_mac_tbl_mc->macs[nentries],
1969 		       enm->enm_addrlo, ETHER_ADDR_LEN);
1970 		ETHER_NEXT_MULTI(step, enm);
1971 	}
1972 	rxfilter = 1;
1973 
1974 set_unlock:
1975 	ETHER_UNLOCK(ec);
1976 
1977 set:
1978 	if (rxfilter) {
1979 		ctrlq->ctrlq_mac_tbl_uc->nentries = 0;
1980 		ctrlq->ctrlq_mac_tbl_mc->nentries = nentries;
1981 		r = vioif_set_rx_filter(sc);
1982 		if (r != 0) {
1983 			rxfilter = 0;
1984 			allmulti = 1; /* fallback */
1985 		}
1986 	} else {
1987 		/* remove rx filter */
1988 		ctrlq->ctrlq_mac_tbl_uc->nentries = 0;
1989 		ctrlq->ctrlq_mac_tbl_mc->nentries = 0;
1990 		r = vioif_set_rx_filter(sc);
1991 		/* what to do on failure? */
1992 	}
1993 	if (allmulti) {
1994 		r = vioif_set_allmulti(sc, true);
1995 		if (r != 0) {
1996 			allmulti = 0;
1997 			promisc = 1; /* fallback */
1998 		}
1999 	} else {
2000 		r = vioif_set_allmulti(sc, false);
2001 		/* what to do on failure? */
2002 	}
2003 	if (promisc) {
2004 		r = vioif_set_promisc(sc, true);
2005 	} else {
2006 		r = vioif_set_promisc(sc, false);
2007 	}
2008 
2009 	return r;
2010 }
2011 
2012 static bool
2013 vioif_is_link_up(struct vioif_softc *sc)
2014 {
2015 	struct virtio_softc *vsc = sc->sc_virtio;
2016 	uint16_t status;
2017 
2018 	if (virtio_features(vsc) & VIRTIO_NET_F_STATUS)
2019 		status = virtio_read_device_config_2(vsc,
2020 		    VIRTIO_NET_CONFIG_STATUS);
2021 	else
2022 		status = VIRTIO_NET_S_LINK_UP;
2023 
2024 	return ((status & VIRTIO_NET_S_LINK_UP) != 0);
2025 }
2026 
2027 /* change link status */
2028 static void
2029 vioif_update_link_status(struct vioif_softc *sc)
2030 {
2031 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
2032 	struct vioif_txqueue *txq;
2033 	bool active, changed;
2034 	int link, i;
2035 
2036 	active = vioif_is_link_up(sc);
2037 	changed = false;
2038 
2039 	if (active) {
2040 		if (!sc->sc_link_active)
2041 			changed = true;
2042 
2043 		link = LINK_STATE_UP;
2044 		sc->sc_link_active = true;
2045 	} else {
2046 		if (sc->sc_link_active)
2047 			changed = true;
2048 
2049 		link = LINK_STATE_DOWN;
2050 		sc->sc_link_active = false;
2051 	}
2052 
2053 	if (changed) {
2054 		for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
2055 			txq = &sc->sc_txq[i];
2056 
2057 			mutex_enter(txq->txq_lock);
2058 			txq->txq_link_active = sc->sc_link_active;
2059 			mutex_exit(txq->txq_lock);
2060 		}
2061 
2062 		if_link_state_change(ifp, link);
2063 	}
2064 }
2065 
2066 static int
2067 vioif_config_change(struct virtio_softc *vsc)
2068 {
2069 	struct vioif_softc *sc = device_private(virtio_child(vsc));
2070 
2071 #ifdef VIOIF_SOFTINT_INTR
2072 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
2073 #endif
2074 
2075 #ifdef VIOIF_SOFTINT_INTR
2076 	KASSERT(!cpu_intr_p());
2077 	vioif_update_link_status(sc);
2078 	vioif_start(ifp);
2079 #else
2080 	softint_schedule(sc->sc_ctl_softint);
2081 #endif
2082 
2083 	return 0;
2084 }
2085 
2086 static void
2087 vioif_ctl_softint(void *arg)
2088 {
2089 	struct vioif_softc *sc = arg;
2090 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
2091 
2092 	vioif_update_link_status(sc);
2093 	vioif_start(ifp);
2094 }
2095 
2096 MODULE(MODULE_CLASS_DRIVER, if_vioif, "virtio");
2097 
2098 #ifdef _MODULE
2099 #include "ioconf.c"
2100 #endif
2101 
2102 static int
2103 if_vioif_modcmd(modcmd_t cmd, void *opaque)
2104 {
2105 	int error = 0;
2106 
2107 #ifdef _MODULE
2108 	switch (cmd) {
2109 	case MODULE_CMD_INIT:
2110 		error = config_init_component(cfdriver_ioconf_if_vioif,
2111 		    cfattach_ioconf_if_vioif, cfdata_ioconf_if_vioif);
2112 		break;
2113 	case MODULE_CMD_FINI:
2114 		error = config_fini_component(cfdriver_ioconf_if_vioif,
2115 		    cfattach_ioconf_if_vioif, cfdata_ioconf_if_vioif);
2116 		break;
2117 	default:
2118 		error = ENOTTY;
2119 		break;
2120 	}
2121 #endif
2122 
2123 	return error;
2124 }
2125