xref: /openbsd-src/sys/dev/pv/if_vio.c (revision d5abdd01d7a5f24fb6f9b0aab446ef59a9e9067a)
1 /*	$OpenBSD: if_vio.c,v 1.23 2023/05/29 08:13:35 sf Exp $	*/
2 
3 /*
4  * Copyright (c) 2012 Stefan Fritsch, Alexander Fiveg.
5  * Copyright (c) 2010 Minoura Makoto.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include "bpfilter.h"
30 #include "vlan.h"
31 
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/kernel.h>
35 #include <sys/device.h>
36 #include <sys/mbuf.h>
37 #include <sys/socket.h>
38 #include <sys/sockio.h>
39 #include <sys/timeout.h>
40 
41 #include <dev/pv/virtioreg.h>
42 #include <dev/pv/virtiovar.h>
43 
44 #include <net/if.h>
45 #include <net/if_media.h>
46 
47 #include <netinet/in.h>
48 #include <netinet/if_ether.h>
49 #include <netinet/ip.h>
50 #include <netinet/tcp.h>
51 #include <netinet/udp.h>
52 
53 #if NBPFILTER > 0
54 #include <net/bpf.h>
55 #endif
56 
57 #if VIRTIO_DEBUG
58 #define DPRINTF(x...) printf(x)
59 #else
60 #define DPRINTF(x...)
61 #endif
62 
63 /*
64  * if_vioreg.h:
65  */
66 /* Configuration registers */
67 #define VIRTIO_NET_CONFIG_MAC		0 /* 8bit x 6byte */
68 #define VIRTIO_NET_CONFIG_STATUS	6 /* 16bit */
69 
70 /* Feature bits */
71 #define VIRTIO_NET_F_CSUM			(1ULL<<0)
72 #define VIRTIO_NET_F_GUEST_CSUM			(1ULL<<1)
73 #define VIRTIO_NET_F_CTRL_GUEST_OFFLOADS        (1ULL<<2)
74 #define VIRTIO_NET_F_MTU                        (1ULL<<3)
75 #define VIRTIO_NET_F_MAC			(1ULL<<5)
76 #define VIRTIO_NET_F_GSO			(1ULL<<6)
77 #define VIRTIO_NET_F_GUEST_TSO4			(1ULL<<7)
78 #define VIRTIO_NET_F_GUEST_TSO6			(1ULL<<8)
79 #define VIRTIO_NET_F_GUEST_ECN			(1ULL<<9)
80 #define VIRTIO_NET_F_GUEST_UFO			(1ULL<<10)
81 #define VIRTIO_NET_F_HOST_TSO4			(1ULL<<11)
82 #define VIRTIO_NET_F_HOST_TSO6			(1ULL<<12)
83 #define VIRTIO_NET_F_HOST_ECN			(1ULL<<13)
84 #define VIRTIO_NET_F_HOST_UFO			(1ULL<<14)
85 #define VIRTIO_NET_F_MRG_RXBUF			(1ULL<<15)
86 #define VIRTIO_NET_F_STATUS			(1ULL<<16)
87 #define VIRTIO_NET_F_CTRL_VQ			(1ULL<<17)
88 #define VIRTIO_NET_F_CTRL_RX			(1ULL<<18)
89 #define VIRTIO_NET_F_CTRL_VLAN			(1ULL<<19)
90 #define VIRTIO_NET_F_CTRL_RX_EXTRA		(1ULL<<20)
91 #define VIRTIO_NET_F_GUEST_ANNOUNCE		(1ULL<<21)
92 #define VIRTIO_NET_F_MQ				(1ULL<<22)
93 #define VIRTIO_NET_F_CTRL_MAC_ADDR		(1ULL<<23)
94 
95 /*
96  * Config(8) flags. The lowest byte is reserved for generic virtio stuff.
97  */
98 
99 /* Workaround for vlan related bug in qemu < version 2.0 */
100 #define CONFFLAG_QEMU_VLAN_BUG		(1<<8)
101 
102 static const struct virtio_feature_name virtio_net_feature_names[] = {
103 #if VIRTIO_DEBUG
104 	{ VIRTIO_NET_F_CSUM,			"CSum" },
105 	{ VIRTIO_NET_F_GUEST_CSUM,		"GuestCSum" },
106 	{ VIRTIO_NET_F_CTRL_GUEST_OFFLOADS,	"CtrlGuestOffl" },
107 	{ VIRTIO_NET_F_MTU,			"MTU", },
108 	{ VIRTIO_NET_F_MAC,			"MAC" },
109 	{ VIRTIO_NET_F_GSO,			"GSO" },
110 	{ VIRTIO_NET_F_GUEST_TSO4,		"GuestTSO4" },
111 	{ VIRTIO_NET_F_GUEST_TSO6,		"GuestTSO6" },
112 	{ VIRTIO_NET_F_GUEST_ECN,		"GuestECN" },
113 	{ VIRTIO_NET_F_GUEST_UFO,		"GuestUFO" },
114 	{ VIRTIO_NET_F_HOST_TSO4,		"HostTSO4" },
115 	{ VIRTIO_NET_F_HOST_TSO6,		"HostTSO6" },
116 	{ VIRTIO_NET_F_HOST_ECN,		"HostECN" },
117 	{ VIRTIO_NET_F_HOST_UFO,		"HostUFO" },
118 	{ VIRTIO_NET_F_MRG_RXBUF,		"MrgRXBuf" },
119 	{ VIRTIO_NET_F_STATUS,			"Status" },
120 	{ VIRTIO_NET_F_CTRL_VQ,			"CtrlVQ" },
121 	{ VIRTIO_NET_F_CTRL_RX,			"CtrlRX" },
122 	{ VIRTIO_NET_F_CTRL_VLAN,		"CtrlVLAN" },
123 	{ VIRTIO_NET_F_CTRL_RX_EXTRA,		"CtrlRXExtra" },
124 	{ VIRTIO_NET_F_GUEST_ANNOUNCE,		"GuestAnnounce" },
125 	{ VIRTIO_NET_F_MQ,			"MQ" },
126 	{ VIRTIO_NET_F_CTRL_MAC_ADDR,		"CtrlMAC" },
127 #endif
128 	{ 0,				NULL }
129 };
130 
131 /* Status */
132 #define VIRTIO_NET_S_LINK_UP	1
133 
134 /* Packet header structure */
135 struct virtio_net_hdr {
136 	uint8_t		flags;
137 	uint8_t		gso_type;
138 	uint16_t	hdr_len;
139 	uint16_t	gso_size;
140 	uint16_t	csum_start;
141 	uint16_t	csum_offset;
142 
143 	/* only present if VIRTIO_NET_F_MRG_RXBUF is negotiated */
144 	uint16_t	num_buffers;
145 } __packed;
146 
147 #define VIRTIO_NET_HDR_F_NEEDS_CSUM	1 /* flags */
148 #define VIRTIO_NET_HDR_GSO_NONE		0 /* gso_type */
149 #define VIRTIO_NET_HDR_GSO_TCPV4	1 /* gso_type */
150 #define VIRTIO_NET_HDR_GSO_UDP		3 /* gso_type */
151 #define VIRTIO_NET_HDR_GSO_TCPV6	4 /* gso_type */
152 #define VIRTIO_NET_HDR_GSO_ECN		0x80 /* gso_type, |'ed */
153 
154 #define VIRTIO_NET_MAX_GSO_LEN		(65536+ETHER_HDR_LEN)
155 
156 /* Control virtqueue */
157 struct virtio_net_ctrl_cmd {
158 	uint8_t	class;
159 	uint8_t	command;
160 } __packed;
161 #define VIRTIO_NET_CTRL_RX		0
162 # define VIRTIO_NET_CTRL_RX_PROMISC	0
163 # define VIRTIO_NET_CTRL_RX_ALLMULTI	1
164 
165 #define VIRTIO_NET_CTRL_MAC		1
166 # define VIRTIO_NET_CTRL_MAC_TABLE_SET	0
167 
168 #define VIRTIO_NET_CTRL_VLAN		2
169 # define VIRTIO_NET_CTRL_VLAN_ADD	0
170 # define VIRTIO_NET_CTRL_VLAN_DEL	1
171 
172 struct virtio_net_ctrl_status {
173 	uint8_t	ack;
174 } __packed;
175 #define VIRTIO_NET_OK			0
176 #define VIRTIO_NET_ERR			1
177 
178 struct virtio_net_ctrl_rx {
179 	uint8_t	onoff;
180 } __packed;
181 
182 struct virtio_net_ctrl_mac_tbl {
183 	uint32_t nentries;
184 	uint8_t macs[][ETHER_ADDR_LEN];
185 } __packed;
186 
187 struct virtio_net_ctrl_vlan {
188 	uint16_t id;
189 } __packed;
190 
191 /*
192  * if_viovar.h:
193  */
194 enum vio_ctrl_state {
195 	FREE, INUSE, DONE, RESET
196 };
197 
198 struct vio_softc {
199 	struct device		sc_dev;
200 
201 	struct virtio_softc	*sc_virtio;
202 #define	VQRX	0
203 #define	VQTX	1
204 #define	VQCTL	2
205 	struct virtqueue	sc_vq[3];
206 
207 	struct arpcom		sc_ac;
208 	struct ifmedia		sc_media;
209 
210 	short			sc_ifflags;
211 
212 	/* bus_dmamem */
213 	bus_dma_segment_t	sc_dma_seg;
214 	bus_dmamap_t		sc_dma_map;
215 	size_t			sc_dma_size;
216 	caddr_t			sc_dma_kva;
217 
218 	int			sc_hdr_size;
219 	struct virtio_net_hdr	*sc_tx_hdrs;
220 	struct virtio_net_ctrl_cmd *sc_ctrl_cmd;
221 	struct virtio_net_ctrl_status *sc_ctrl_status;
222 	struct virtio_net_ctrl_rx *sc_ctrl_rx;
223 	struct virtio_net_ctrl_mac_tbl *sc_ctrl_mac_tbl_uc;
224 #define sc_ctrl_mac_info sc_ctrl_mac_tbl_uc
225 	struct virtio_net_ctrl_mac_tbl *sc_ctrl_mac_tbl_mc;
226 
227 	/* kmem */
228 	bus_dmamap_t		*sc_arrays;
229 #define sc_rx_dmamaps sc_arrays
230 	bus_dmamap_t		*sc_tx_dmamaps;
231 	struct mbuf		**sc_rx_mbufs;
232 	struct mbuf		**sc_tx_mbufs;
233 	struct if_rxring	sc_rx_ring;
234 
235 	enum vio_ctrl_state	sc_ctrl_inuse;
236 
237 	struct timeout		sc_txtick, sc_rxtick;
238 };
239 
240 #define VIO_DMAMEM_OFFSET(sc, p) ((caddr_t)(p) - (sc)->sc_dma_kva)
241 #define VIO_DMAMEM_SYNC(vsc, sc, p, size, flags)		\
242 	bus_dmamap_sync((vsc)->sc_dmat, (sc)->sc_dma_map,	\
243 	    VIO_DMAMEM_OFFSET((sc), (p)), (size), (flags))
244 #define VIO_DMAMEM_ENQUEUE(sc, vq, slot, p, size, write)	\
245 	virtio_enqueue_p((vq), (slot), (sc)->sc_dma_map,	\
246 	    VIO_DMAMEM_OFFSET((sc), (p)), (size), (write))
247 #define VIO_HAVE_MRG_RXBUF(sc)					\
248 	((sc)->sc_hdr_size == sizeof(struct virtio_net_hdr))
249 
250 #define VIRTIO_NET_TX_MAXNSEGS		16 /* for larger chains, defrag */
251 #define VIRTIO_NET_CTRL_MAC_MC_ENTRIES	64 /* for more entries, use ALLMULTI */
252 #define VIRTIO_NET_CTRL_MAC_UC_ENTRIES	 1 /* one entry for own unicast addr */
253 
254 #define VIO_CTRL_MAC_INFO_SIZE					\
255 	(2*sizeof(struct virtio_net_ctrl_mac_tbl) +		\
256 	 (VIRTIO_NET_CTRL_MAC_MC_ENTRIES +			\
257 	  VIRTIO_NET_CTRL_MAC_UC_ENTRIES) * ETHER_ADDR_LEN)
258 
259 /* cfattach interface functions */
260 int	vio_match(struct device *, void *, void *);
261 void	vio_attach(struct device *, struct device *, void *);
262 
263 /* ifnet interface functions */
264 int	vio_init(struct ifnet *);
265 void	vio_stop(struct ifnet *, int);
266 void	vio_start(struct ifnet *);
267 int	vio_ioctl(struct ifnet *, u_long, caddr_t);
268 void	vio_get_lladr(struct arpcom *ac, struct virtio_softc *vsc);
269 void	vio_put_lladr(struct arpcom *ac, struct virtio_softc *vsc);
270 
271 /* rx */
272 int	vio_add_rx_mbuf(struct vio_softc *, int);
273 void	vio_free_rx_mbuf(struct vio_softc *, int);
274 void	vio_populate_rx_mbufs(struct vio_softc *);
275 int	vio_rxeof(struct vio_softc *);
276 int	vio_rx_intr(struct virtqueue *);
277 void	vio_rx_drain(struct vio_softc *);
278 void	vio_rxtick(void *);
279 
280 /* tx */
281 int	vio_tx_intr(struct virtqueue *);
282 int	vio_txeof(struct virtqueue *);
283 void	vio_tx_drain(struct vio_softc *);
284 int	vio_encap(struct vio_softc *, int, struct mbuf *);
285 void	vio_txtick(void *);
286 
287 /* other control */
288 void	vio_link_state(struct ifnet *);
289 int	vio_config_change(struct virtio_softc *);
290 int	vio_ctrl_rx(struct vio_softc *, int, int);
291 int	vio_set_rx_filter(struct vio_softc *);
292 void	vio_iff(struct vio_softc *);
293 int	vio_media_change(struct ifnet *);
294 void	vio_media_status(struct ifnet *, struct ifmediareq *);
295 int	vio_ctrleof(struct virtqueue *);
296 int	vio_wait_ctrl(struct vio_softc *sc);
297 int	vio_wait_ctrl_done(struct vio_softc *sc);
298 void	vio_ctrl_wakeup(struct vio_softc *, enum vio_ctrl_state);
299 int	vio_alloc_mem(struct vio_softc *);
300 int	vio_alloc_dmamem(struct vio_softc *);
301 void	vio_free_dmamem(struct vio_softc *);
302 
303 #if VIRTIO_DEBUG
304 void	vio_dump(struct vio_softc *);
305 #endif
306 
307 int
308 vio_match(struct device *parent, void *match, void *aux)
309 {
310 	struct virtio_softc *va = aux;
311 
312 	if (va->sc_childdevid == PCI_PRODUCT_VIRTIO_NETWORK)
313 		return 1;
314 
315 	return 0;
316 }
317 
318 const struct cfattach vio_ca = {
319 	sizeof(struct vio_softc), vio_match, vio_attach, NULL
320 };
321 
322 struct cfdriver vio_cd = {
323 	NULL, "vio", DV_IFNET
324 };
325 
326 int
327 vio_alloc_dmamem(struct vio_softc *sc)
328 {
329 	struct virtio_softc *vsc = sc->sc_virtio;
330 	int nsegs;
331 
332 	if (bus_dmamap_create(vsc->sc_dmat, sc->sc_dma_size, 1,
333 	    sc->sc_dma_size, 0, BUS_DMA_NOWAIT | BUS_DMA_ALLOCNOW,
334 	    &sc->sc_dma_map) != 0)
335 		goto err;
336 	if (bus_dmamem_alloc(vsc->sc_dmat, sc->sc_dma_size, 16, 0,
337 	    &sc->sc_dma_seg, 1, &nsegs, BUS_DMA_NOWAIT | BUS_DMA_ZERO) != 0)
338 		goto destroy;
339 	if (bus_dmamem_map(vsc->sc_dmat, &sc->sc_dma_seg, nsegs,
340 	    sc->sc_dma_size, &sc->sc_dma_kva, BUS_DMA_NOWAIT) != 0)
341 		goto free;
342 	if (bus_dmamap_load(vsc->sc_dmat, sc->sc_dma_map, sc->sc_dma_kva,
343 	    sc->sc_dma_size, NULL, BUS_DMA_NOWAIT) != 0)
344 		goto unmap;
345 	return (0);
346 
347 unmap:
348 	bus_dmamem_unmap(vsc->sc_dmat, sc->sc_dma_kva, sc->sc_dma_size);
349 free:
350 	bus_dmamem_free(vsc->sc_dmat, &sc->sc_dma_seg, 1);
351 destroy:
352 	bus_dmamap_destroy(vsc->sc_dmat, sc->sc_dma_map);
353 err:
354 	return (1);
355 }
356 
357 void
358 vio_free_dmamem(struct vio_softc *sc)
359 {
360 	struct virtio_softc *vsc = sc->sc_virtio;
361 	bus_dmamap_unload(vsc->sc_dmat, sc->sc_dma_map);
362 	bus_dmamem_unmap(vsc->sc_dmat, sc->sc_dma_kva, sc->sc_dma_size);
363 	bus_dmamem_free(vsc->sc_dmat, &sc->sc_dma_seg, 1);
364 	bus_dmamap_destroy(vsc->sc_dmat, sc->sc_dma_map);
365 }
366 
367 /* allocate memory */
368 /*
369  * dma memory is used for:
370  *   sc_tx_hdrs[slot]:	 metadata array for frames to be sent (WRITE)
371  *   sc_ctrl_cmd:	 command to be sent via ctrl vq (WRITE)
372  *   sc_ctrl_status:	 return value for a command via ctrl vq (READ)
373  *   sc_ctrl_rx:	 parameter for a VIRTIO_NET_CTRL_RX class command
374  *			 (WRITE)
375  *   sc_ctrl_mac_tbl_uc: unicast MAC address filter for a VIRTIO_NET_CTRL_MAC
376  *			 class command (WRITE)
377  *   sc_ctrl_mac_tbl_mc: multicast MAC address filter for a VIRTIO_NET_CTRL_MAC
378  *			 class command (WRITE)
379  * sc_ctrl_* structures are allocated only one each; they are protected by
380  * sc_ctrl_inuse, which must only be accessed at splnet
381  *
382  * metadata headers for received frames are stored at the start of the
383  * rx mbufs.
384  */
385 /*
386  * dynamically allocated memory is used for:
387  *   sc_rx_dmamaps[slot]:	bus_dmamap_t array for received payload
388  *   sc_tx_dmamaps[slot]:	bus_dmamap_t array for sent payload
389  *   sc_rx_mbufs[slot]:		mbuf pointer array for received frames
390  *   sc_tx_mbufs[slot]:		mbuf pointer array for sent frames
391  */
392 int
393 vio_alloc_mem(struct vio_softc *sc)
394 {
395 	struct virtio_softc *vsc = sc->sc_virtio;
396 	struct ifnet *ifp = &sc->sc_ac.ac_if;
397 	int allocsize, r, i, txsize;
398 	unsigned int offset = 0;
399 	int rxqsize, txqsize;
400 	caddr_t kva;
401 
402 	rxqsize = vsc->sc_vqs[0].vq_num;
403 	txqsize = vsc->sc_vqs[1].vq_num;
404 
405 	/*
406 	 * For simplicity, we always allocate the full virtio_net_hdr size
407 	 * even if VIRTIO_NET_F_MRG_RXBUF is not negotiated and
408 	 * only a part of the memory is ever used.
409 	 */
410 	allocsize = sizeof(struct virtio_net_hdr) * txqsize;
411 
412 	if (vsc->sc_nvqs == 3) {
413 		allocsize += sizeof(struct virtio_net_ctrl_cmd) * 1;
414 		allocsize += sizeof(struct virtio_net_ctrl_status) * 1;
415 		allocsize += sizeof(struct virtio_net_ctrl_rx) * 1;
416 		allocsize += VIO_CTRL_MAC_INFO_SIZE;
417 	}
418 	sc->sc_dma_size = allocsize;
419 
420 	if (vio_alloc_dmamem(sc) != 0) {
421 		printf("unable to allocate dma region\n");
422 		return  -1;
423 	}
424 
425 	kva = sc->sc_dma_kva;
426 	sc->sc_tx_hdrs = (struct virtio_net_hdr*)(kva + offset);
427 	offset += sizeof(struct virtio_net_hdr) * txqsize;
428 	if (vsc->sc_nvqs == 3) {
429 		sc->sc_ctrl_cmd = (void*)(kva + offset);
430 		offset += sizeof(*sc->sc_ctrl_cmd);
431 		sc->sc_ctrl_status = (void*)(kva + offset);
432 		offset += sizeof(*sc->sc_ctrl_status);
433 		sc->sc_ctrl_rx = (void*)(kva + offset);
434 		offset += sizeof(*sc->sc_ctrl_rx);
435 		sc->sc_ctrl_mac_tbl_uc = (void*)(kva + offset);
436 		offset += sizeof(*sc->sc_ctrl_mac_tbl_uc) +
437 		    ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_UC_ENTRIES;
438 		sc->sc_ctrl_mac_tbl_mc = (void*)(kva + offset);
439 	}
440 
441 	sc->sc_arrays = mallocarray(rxqsize + txqsize,
442 	    2 * sizeof(bus_dmamap_t) + sizeof(struct mbuf *), M_DEVBUF,
443 	    M_WAITOK | M_CANFAIL | M_ZERO);
444 	if (sc->sc_arrays == NULL) {
445 		printf("unable to allocate mem for dmamaps\n");
446 		goto err_hdr;
447 	}
448 	allocsize = (rxqsize + txqsize) *
449 	    (2 * sizeof(bus_dmamap_t) + sizeof(struct mbuf *));
450 
451 	sc->sc_tx_dmamaps = sc->sc_arrays + rxqsize;
452 	sc->sc_rx_mbufs = (void*) (sc->sc_tx_dmamaps + txqsize);
453 	sc->sc_tx_mbufs = sc->sc_rx_mbufs + rxqsize;
454 
455 	for (i = 0; i < rxqsize; i++) {
456 		r = bus_dmamap_create(vsc->sc_dmat, MCLBYTES, 1, MCLBYTES, 0,
457 		    BUS_DMA_NOWAIT|BUS_DMA_ALLOCNOW, &sc->sc_rx_dmamaps[i]);
458 		if (r != 0)
459 			goto err_reqs;
460 	}
461 
462 	txsize = ifp->if_hardmtu + sc->sc_hdr_size + ETHER_HDR_LEN;
463 	for (i = 0; i < txqsize; i++) {
464 		r = bus_dmamap_create(vsc->sc_dmat, txsize,
465 		    VIRTIO_NET_TX_MAXNSEGS, txsize, 0,
466 		    BUS_DMA_NOWAIT|BUS_DMA_ALLOCNOW,
467 		    &sc->sc_tx_dmamaps[i]);
468 		if (r != 0)
469 			goto err_reqs;
470 	}
471 
472 	return 0;
473 
474 err_reqs:
475 	printf("dmamap creation failed, error %d\n", r);
476 	for (i = 0; i < txqsize; i++) {
477 		if (sc->sc_tx_dmamaps[i])
478 			bus_dmamap_destroy(vsc->sc_dmat, sc->sc_tx_dmamaps[i]);
479 	}
480 	for (i = 0; i < rxqsize; i++) {
481 		if (sc->sc_rx_dmamaps[i])
482 			bus_dmamap_destroy(vsc->sc_dmat, sc->sc_rx_dmamaps[i]);
483 	}
484 	if (sc->sc_arrays) {
485 		free(sc->sc_arrays, M_DEVBUF, allocsize);
486 		sc->sc_arrays = 0;
487 	}
488 err_hdr:
489 	vio_free_dmamem(sc);
490 	return -1;
491 }
492 
493 void
494 vio_get_lladr(struct arpcom *ac, struct virtio_softc *vsc)
495 {
496 	int i;
497 	for (i = 0; i < ETHER_ADDR_LEN; i++) {
498 		ac->ac_enaddr[i] = virtio_read_device_config_1(vsc,
499 		    VIRTIO_NET_CONFIG_MAC + i);
500 	}
501 }
502 
503 void
504 vio_put_lladr(struct arpcom *ac, struct virtio_softc *vsc)
505 {
506 	int i;
507 	for (i = 0; i < ETHER_ADDR_LEN; i++) {
508 		virtio_write_device_config_1(vsc, VIRTIO_NET_CONFIG_MAC + i,
509 		     ac->ac_enaddr[i]);
510 	}
511 }
512 
513 void
514 vio_attach(struct device *parent, struct device *self, void *aux)
515 {
516 	struct vio_softc *sc = (struct vio_softc *)self;
517 	struct virtio_softc *vsc = (struct virtio_softc *)parent;
518 	int i;
519 	struct ifnet *ifp = &sc->sc_ac.ac_if;
520 
521 	if (vsc->sc_child != NULL) {
522 		printf(": child already attached for %s; something wrong...\n",
523 		    parent->dv_xname);
524 		return;
525 	}
526 
527 	sc->sc_virtio = vsc;
528 
529 	vsc->sc_child = self;
530 	vsc->sc_ipl = IPL_NET;
531 	vsc->sc_vqs = &sc->sc_vq[0];
532 	vsc->sc_config_change = 0;
533 	vsc->sc_driver_features = VIRTIO_NET_F_MAC | VIRTIO_NET_F_STATUS |
534 	    VIRTIO_NET_F_CTRL_VQ | VIRTIO_NET_F_CTRL_RX |
535 	    VIRTIO_NET_F_MRG_RXBUF | VIRTIO_NET_F_CSUM |
536 	    VIRTIO_F_RING_EVENT_IDX;
537 
538 	virtio_negotiate_features(vsc, virtio_net_feature_names);
539 	if (virtio_has_feature(vsc, VIRTIO_NET_F_MAC)) {
540 		vio_get_lladr(&sc->sc_ac, vsc);
541 	} else {
542 		ether_fakeaddr(ifp);
543 		vio_put_lladr(&sc->sc_ac, vsc);
544 	}
545 	printf(": address %s\n", ether_sprintf(sc->sc_ac.ac_enaddr));
546 
547 	if (virtio_has_feature(vsc, VIRTIO_NET_F_MRG_RXBUF) ||
548 	    vsc->sc_version_1) {
549 		sc->sc_hdr_size = sizeof(struct virtio_net_hdr);
550 	} else {
551 		sc->sc_hdr_size = offsetof(struct virtio_net_hdr, num_buffers);
552 	}
553 	if (virtio_has_feature(vsc, VIRTIO_NET_F_MRG_RXBUF))
554 		ifp->if_hardmtu = 16000; /* arbitrary limit */
555 	else
556 		ifp->if_hardmtu = MCLBYTES - sc->sc_hdr_size - ETHER_HDR_LEN;
557 
558 	if (virtio_alloc_vq(vsc, &sc->sc_vq[VQRX], 0, MCLBYTES, 2, "rx") != 0)
559 		goto err;
560 	vsc->sc_nvqs = 1;
561 	sc->sc_vq[VQRX].vq_done = vio_rx_intr;
562 	if (virtio_alloc_vq(vsc, &sc->sc_vq[VQTX], 1,
563 	    sc->sc_hdr_size + ifp->if_hardmtu + ETHER_HDR_LEN,
564 	    VIRTIO_NET_TX_MAXNSEGS + 1, "tx") != 0) {
565 		goto err;
566 	}
567 	vsc->sc_nvqs = 2;
568 	sc->sc_vq[VQTX].vq_done = vio_tx_intr;
569 	virtio_start_vq_intr(vsc, &sc->sc_vq[VQRX]);
570 	if (virtio_has_feature(vsc, VIRTIO_F_RING_EVENT_IDX))
571 		virtio_postpone_intr_far(&sc->sc_vq[VQTX]);
572 	else
573 		virtio_stop_vq_intr(vsc, &sc->sc_vq[VQTX]);
574 	if (virtio_has_feature(vsc, VIRTIO_NET_F_CTRL_VQ)
575 	    && virtio_has_feature(vsc, VIRTIO_NET_F_CTRL_RX)) {
576 		if (virtio_alloc_vq(vsc, &sc->sc_vq[VQCTL], 2, NBPG, 1,
577 		    "control") == 0) {
578 			sc->sc_vq[VQCTL].vq_done = vio_ctrleof;
579 			virtio_start_vq_intr(vsc, &sc->sc_vq[VQCTL]);
580 			vsc->sc_nvqs = 3;
581 		}
582 	}
583 
584 	if (vio_alloc_mem(sc) < 0)
585 		goto err;
586 
587 	strlcpy(ifp->if_xname, self->dv_xname, IFNAMSIZ);
588 	ifp->if_softc = sc;
589 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
590 	ifp->if_start = vio_start;
591 	ifp->if_ioctl = vio_ioctl;
592 	ifp->if_capabilities = IFCAP_VLAN_MTU;
593 	if (virtio_has_feature(vsc, VIRTIO_NET_F_CSUM))
594 		ifp->if_capabilities |= IFCAP_CSUM_TCPv4|IFCAP_CSUM_UDPv4;
595 	ifq_set_maxlen(&ifp->if_snd, vsc->sc_vqs[1].vq_num - 1);
596 	ifmedia_init(&sc->sc_media, 0, vio_media_change, vio_media_status);
597 	ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
598 	ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO);
599 	vsc->sc_config_change = vio_config_change;
600 	timeout_set(&sc->sc_txtick, vio_txtick, &sc->sc_vq[VQTX]);
601 	timeout_set(&sc->sc_rxtick, vio_rxtick, &sc->sc_vq[VQRX]);
602 
603 	virtio_set_status(vsc, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK);
604 	if_attach(ifp);
605 	ether_ifattach(ifp);
606 
607 	return;
608 
609 err:
610 	for (i = 0; i < vsc->sc_nvqs; i++)
611 		virtio_free_vq(vsc, &sc->sc_vq[i]);
612 	vsc->sc_nvqs = 0;
613 	vsc->sc_child = VIRTIO_CHILD_ERROR;
614 	return;
615 }
616 
617 /* check link status */
618 void
619 vio_link_state(struct ifnet *ifp)
620 {
621 	struct vio_softc *sc = ifp->if_softc;
622 	struct virtio_softc *vsc = sc->sc_virtio;
623 	int link_state = LINK_STATE_FULL_DUPLEX;
624 
625 	if (virtio_has_feature(vsc, VIRTIO_NET_F_STATUS)) {
626 		int status = virtio_read_device_config_2(vsc,
627 		    VIRTIO_NET_CONFIG_STATUS);
628 		if (!(status & VIRTIO_NET_S_LINK_UP))
629 			link_state = LINK_STATE_DOWN;
630 	}
631 	if (ifp->if_link_state != link_state) {
632 		ifp->if_link_state = link_state;
633 		if_link_state_change(ifp);
634 	}
635 }
636 
637 int
638 vio_config_change(struct virtio_softc *vsc)
639 {
640 	struct vio_softc *sc = (struct vio_softc *)vsc->sc_child;
641 	vio_link_state(&sc->sc_ac.ac_if);
642 	return 1;
643 }
644 
645 int
646 vio_media_change(struct ifnet *ifp)
647 {
648 	/* Ignore */
649 	return (0);
650 }
651 
652 void
653 vio_media_status(struct ifnet *ifp, struct ifmediareq *imr)
654 {
655 	imr->ifm_active = IFM_ETHER | IFM_AUTO;
656 	imr->ifm_status = IFM_AVALID;
657 
658 	vio_link_state(ifp);
659 	if (LINK_STATE_IS_UP(ifp->if_link_state) && ifp->if_flags & IFF_UP)
660 		imr->ifm_status |= IFM_ACTIVE|IFM_FDX;
661 }
662 
663 /*
664  * Interface functions for ifnet
665  */
666 int
667 vio_init(struct ifnet *ifp)
668 {
669 	struct vio_softc *sc = ifp->if_softc;
670 
671 	vio_stop(ifp, 0);
672 	if_rxr_init(&sc->sc_rx_ring, 2 * ((ifp->if_hardmtu / MCLBYTES) + 1),
673 	    sc->sc_vq[VQRX].vq_num);
674 	vio_populate_rx_mbufs(sc);
675 	ifp->if_flags |= IFF_RUNNING;
676 	ifq_clr_oactive(&ifp->if_snd);
677 	vio_iff(sc);
678 	vio_link_state(ifp);
679 	return 0;
680 }
681 
682 void
683 vio_stop(struct ifnet *ifp, int disable)
684 {
685 	struct vio_softc *sc = ifp->if_softc;
686 	struct virtio_softc *vsc = sc->sc_virtio;
687 
688 	timeout_del(&sc->sc_txtick);
689 	timeout_del(&sc->sc_rxtick);
690 	ifp->if_flags &= ~IFF_RUNNING;
691 	ifq_clr_oactive(&ifp->if_snd);
692 	/* only way to stop I/O and DMA is resetting... */
693 	virtio_reset(vsc);
694 	vio_rxeof(sc);
695 	if (vsc->sc_nvqs >= 3)
696 		vio_ctrleof(&sc->sc_vq[VQCTL]);
697 	vio_tx_drain(sc);
698 	if (disable)
699 		vio_rx_drain(sc);
700 
701 	virtio_reinit_start(vsc);
702 	virtio_start_vq_intr(vsc, &sc->sc_vq[VQRX]);
703 	virtio_stop_vq_intr(vsc, &sc->sc_vq[VQTX]);
704 	if (vsc->sc_nvqs >= 3)
705 		virtio_start_vq_intr(vsc, &sc->sc_vq[VQCTL]);
706 	virtio_reinit_end(vsc);
707 	if (vsc->sc_nvqs >= 3) {
708 		if (sc->sc_ctrl_inuse != FREE)
709 			sc->sc_ctrl_inuse = RESET;
710 		wakeup(&sc->sc_ctrl_inuse);
711 	}
712 }
713 
714 void
715 vio_start(struct ifnet *ifp)
716 {
717 	struct vio_softc *sc = ifp->if_softc;
718 	struct virtio_softc *vsc = sc->sc_virtio;
719 	struct virtqueue *vq = &sc->sc_vq[VQTX];
720 	struct mbuf *m;
721 	int queued = 0;
722 
723 	vio_txeof(vq);
724 
725 	if (!(ifp->if_flags & IFF_RUNNING) || ifq_is_oactive(&ifp->if_snd))
726 		return;
727 	if (ifq_empty(&ifp->if_snd))
728 		return;
729 
730 again:
731 	for (;;) {
732 		int slot, r;
733 		struct virtio_net_hdr *hdr;
734 
735 		m = ifq_deq_begin(&ifp->if_snd);
736 		if (m == NULL)
737 			break;
738 
739 		r = virtio_enqueue_prep(vq, &slot);
740 		if (r == EAGAIN) {
741 			ifq_deq_rollback(&ifp->if_snd, m);
742 			ifq_set_oactive(&ifp->if_snd);
743 			break;
744 		}
745 		if (r != 0)
746 			panic("enqueue_prep for a tx buffer: %d", r);
747 
748 		hdr = &sc->sc_tx_hdrs[slot];
749 		memset(hdr, 0, sc->sc_hdr_size);
750 		if (m->m_pkthdr.csum_flags & (M_TCP_CSUM_OUT|M_UDP_CSUM_OUT)) {
751 			struct mbuf *mip;
752 			struct ip *ip;
753 			int ehdrlen = ETHER_HDR_LEN;
754 			int ipoff;
755 #if NVLAN > 0
756 			struct ether_vlan_header *eh;
757 
758 			eh = mtod(m, struct ether_vlan_header *);
759 			if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN))
760 				ehdrlen += ETHER_VLAN_ENCAP_LEN;
761 #endif
762 
763 			if (m->m_pkthdr.csum_flags & M_TCP_CSUM_OUT)
764 				hdr->csum_offset = offsetof(struct tcphdr, th_sum);
765 			else
766 				hdr->csum_offset = offsetof(struct udphdr, uh_sum);
767 
768 			mip = m_getptr(m, ehdrlen, &ipoff);
769 			KASSERT(mip != NULL && mip->m_len - ipoff >= sizeof(*ip));
770 			ip = (struct ip *)(mip->m_data + ipoff);
771 			hdr->csum_start = ehdrlen + (ip->ip_hl << 2);
772 			hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
773 		}
774 
775 		r = vio_encap(sc, slot, m);
776 		if (r != 0) {
777 			virtio_enqueue_abort(vq, slot);
778 			ifq_deq_commit(&ifp->if_snd, m);
779 			m_freem(m);
780 			ifp->if_oerrors++;
781 			continue;
782 		}
783 		r = virtio_enqueue_reserve(vq, slot,
784 		    sc->sc_tx_dmamaps[slot]->dm_nsegs + 1);
785 		if (r != 0) {
786 			bus_dmamap_unload(vsc->sc_dmat,
787 			    sc->sc_tx_dmamaps[slot]);
788 			ifq_deq_rollback(&ifp->if_snd, m);
789 			sc->sc_tx_mbufs[slot] = NULL;
790 			ifq_set_oactive(&ifp->if_snd);
791 			break;
792 		}
793 		ifq_deq_commit(&ifp->if_snd, m);
794 
795 		bus_dmamap_sync(vsc->sc_dmat, sc->sc_tx_dmamaps[slot], 0,
796 		    sc->sc_tx_dmamaps[slot]->dm_mapsize, BUS_DMASYNC_PREWRITE);
797 		VIO_DMAMEM_SYNC(vsc, sc, hdr, sc->sc_hdr_size,
798 		    BUS_DMASYNC_PREWRITE);
799 		VIO_DMAMEM_ENQUEUE(sc, vq, slot, hdr, sc->sc_hdr_size, 1);
800 		virtio_enqueue(vq, slot, sc->sc_tx_dmamaps[slot], 1);
801 		virtio_enqueue_commit(vsc, vq, slot, 0);
802 		queued++;
803 #if NBPFILTER > 0
804 		if (ifp->if_bpf)
805 			bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT);
806 #endif
807 	}
808 	if (ifq_is_oactive(&ifp->if_snd)) {
809 		int r;
810 		if (virtio_has_feature(vsc, VIRTIO_F_RING_EVENT_IDX))
811 			r = virtio_postpone_intr_smart(&sc->sc_vq[VQTX]);
812 		else
813 			r = virtio_start_vq_intr(vsc, &sc->sc_vq[VQTX]);
814 		if (r) {
815 			vio_txeof(vq);
816 			goto again;
817 		}
818 	}
819 
820 	if (queued > 0) {
821 		virtio_notify(vsc, vq);
822 		timeout_add_sec(&sc->sc_txtick, 1);
823 	}
824 }
825 
826 #if VIRTIO_DEBUG
827 void
828 vio_dump(struct vio_softc *sc)
829 {
830 	struct ifnet *ifp = &sc->sc_ac.ac_if;
831 	struct virtio_softc *vsc = sc->sc_virtio;
832 
833 	printf("%s status dump:\n", ifp->if_xname);
834 	printf("TX virtqueue:\n");
835 	virtio_vq_dump(&vsc->sc_vqs[VQTX]);
836 	printf("tx tick active: %d\n", !timeout_triggered(&sc->sc_txtick));
837 	printf("rx tick active: %d\n", !timeout_triggered(&sc->sc_rxtick));
838 	printf("RX virtqueue:\n");
839 	virtio_vq_dump(&vsc->sc_vqs[VQRX]);
840 	if (vsc->sc_nvqs == 3) {
841 		printf("CTL virtqueue:\n");
842 		virtio_vq_dump(&vsc->sc_vqs[VQCTL]);
843 		printf("ctrl_inuse: %d\n", sc->sc_ctrl_inuse);
844 	}
845 }
846 #endif
847 
848 int
849 vio_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
850 {
851 	struct vio_softc *sc = ifp->if_softc;
852 	struct ifreq *ifr = (struct ifreq *)data;
853 	int s, r = 0;
854 
855 	s = splnet();
856 	switch (cmd) {
857 	case SIOCSIFADDR:
858 		ifp->if_flags |= IFF_UP;
859 		if (!(ifp->if_flags & IFF_RUNNING))
860 			vio_init(ifp);
861 		break;
862 	case SIOCSIFFLAGS:
863 		if (ifp->if_flags & IFF_UP) {
864 #if VIRTIO_DEBUG
865 			if (ifp->if_flags & IFF_DEBUG)
866 				vio_dump(sc);
867 #endif
868 			if (ifp->if_flags & IFF_RUNNING)
869 				r = ENETRESET;
870 			else
871 				vio_init(ifp);
872 		} else {
873 			if (ifp->if_flags & IFF_RUNNING)
874 				vio_stop(ifp, 1);
875 		}
876 		break;
877 	case SIOCGIFMEDIA:
878 	case SIOCSIFMEDIA:
879 		r = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd);
880 		break;
881 	case SIOCGIFRXR:
882 		r = if_rxr_ioctl((struct if_rxrinfo *)ifr->ifr_data,
883 		    NULL, MCLBYTES, &sc->sc_rx_ring);
884 		break;
885 	default:
886 		r = ether_ioctl(ifp, &sc->sc_ac, cmd, data);
887 	}
888 
889 	if (r == ENETRESET) {
890 		if (ifp->if_flags & IFF_RUNNING)
891 			vio_iff(sc);
892 		r = 0;
893 	}
894 	splx(s);
895 	return r;
896 }
897 
898 /*
899  * Receive implementation
900  */
901 /* allocate and initialize a mbuf for receive */
902 int
903 vio_add_rx_mbuf(struct vio_softc *sc, int i)
904 {
905 	struct mbuf *m;
906 	int r;
907 
908 	m = MCLGETL(NULL, M_DONTWAIT, MCLBYTES);
909 	if (m == NULL)
910 		return ENOBUFS;
911 	sc->sc_rx_mbufs[i] = m;
912 	m->m_len = m->m_pkthdr.len = m->m_ext.ext_size;
913 	r = bus_dmamap_load_mbuf(sc->sc_virtio->sc_dmat, sc->sc_rx_dmamaps[i],
914 	    m, BUS_DMA_READ|BUS_DMA_NOWAIT);
915 	if (r) {
916 		m_freem(m);
917 		sc->sc_rx_mbufs[i] = NULL;
918 		return r;
919 	}
920 
921 	return 0;
922 }
923 
924 /* free a mbuf for receive */
925 void
926 vio_free_rx_mbuf(struct vio_softc *sc, int i)
927 {
928 	bus_dmamap_unload(sc->sc_virtio->sc_dmat, sc->sc_rx_dmamaps[i]);
929 	m_freem(sc->sc_rx_mbufs[i]);
930 	sc->sc_rx_mbufs[i] = NULL;
931 }
932 
933 /* add mbufs for all the empty receive slots */
934 void
935 vio_populate_rx_mbufs(struct vio_softc *sc)
936 {
937 	struct virtio_softc *vsc = sc->sc_virtio;
938 	int r, done = 0;
939 	u_int slots;
940 	struct virtqueue *vq = &sc->sc_vq[VQRX];
941 	int mrg_rxbuf = VIO_HAVE_MRG_RXBUF(sc);
942 
943 	for (slots = if_rxr_get(&sc->sc_rx_ring, vq->vq_num);
944 	    slots > 0; slots--) {
945 		int slot;
946 		r = virtio_enqueue_prep(vq, &slot);
947 		if (r == EAGAIN)
948 			break;
949 		if (r != 0)
950 			panic("enqueue_prep for rx buffers: %d", r);
951 		if (sc->sc_rx_mbufs[slot] == NULL) {
952 			r = vio_add_rx_mbuf(sc, slot);
953 			if (r != 0) {
954 				virtio_enqueue_abort(vq, slot);
955 				break;
956 			}
957 		}
958 		r = virtio_enqueue_reserve(vq, slot,
959 		    sc->sc_rx_dmamaps[slot]->dm_nsegs + (mrg_rxbuf ? 0 : 1));
960 		if (r != 0) {
961 			vio_free_rx_mbuf(sc, slot);
962 			break;
963 		}
964 		bus_dmamap_sync(vsc->sc_dmat, sc->sc_rx_dmamaps[slot], 0,
965 		    MCLBYTES, BUS_DMASYNC_PREREAD);
966 		if (mrg_rxbuf) {
967 			virtio_enqueue(vq, slot, sc->sc_rx_dmamaps[slot], 0);
968 		} else {
969 			/*
970 			 * Buggy kvm wants a buffer of exactly the size of
971 			 * the header in this case, so we have to split in
972 			 * two.
973 			 */
974 			virtio_enqueue_p(vq, slot, sc->sc_rx_dmamaps[slot],
975 			    0, sc->sc_hdr_size, 0);
976 			virtio_enqueue_p(vq, slot, sc->sc_rx_dmamaps[slot],
977 			    sc->sc_hdr_size, MCLBYTES - sc->sc_hdr_size, 0);
978 		}
979 		virtio_enqueue_commit(vsc, vq, slot, 0);
980 		done = 1;
981 	}
982 	if_rxr_put(&sc->sc_rx_ring, slots);
983 
984 	if (done)
985 		virtio_notify(vsc, vq);
986 	timeout_add_sec(&sc->sc_rxtick, 1);
987 }
988 
989 /* dequeue received packets */
990 int
991 vio_rxeof(struct vio_softc *sc)
992 {
993 	struct virtio_softc *vsc = sc->sc_virtio;
994 	struct virtqueue *vq = &sc->sc_vq[VQRX];
995 	struct ifnet *ifp = &sc->sc_ac.ac_if;
996 	struct mbuf_list ml = MBUF_LIST_INITIALIZER();
997 	struct mbuf *m, *m0 = NULL, *mlast;
998 	int r = 0;
999 	int slot, len, bufs_left;
1000 	struct virtio_net_hdr *hdr;
1001 
1002 	while (virtio_dequeue(vsc, vq, &slot, &len) == 0) {
1003 		r = 1;
1004 		bus_dmamap_sync(vsc->sc_dmat, sc->sc_rx_dmamaps[slot], 0,
1005 		    MCLBYTES, BUS_DMASYNC_POSTREAD);
1006 		m = sc->sc_rx_mbufs[slot];
1007 		KASSERT(m != NULL);
1008 		bus_dmamap_unload(vsc->sc_dmat, sc->sc_rx_dmamaps[slot]);
1009 		sc->sc_rx_mbufs[slot] = NULL;
1010 		virtio_dequeue_commit(vq, slot);
1011 		if_rxr_put(&sc->sc_rx_ring, 1);
1012 		m->m_len = m->m_pkthdr.len = len;
1013 		m->m_pkthdr.csum_flags = 0;
1014 		if (m0 == NULL) {
1015 			hdr = mtod(m, struct virtio_net_hdr *);
1016 			m_adj(m, sc->sc_hdr_size);
1017 			m0 = mlast = m;
1018 			if (VIO_HAVE_MRG_RXBUF(sc))
1019 				bufs_left = hdr->num_buffers - 1;
1020 			else
1021 				bufs_left = 0;
1022 		} else {
1023 			m->m_flags &= ~M_PKTHDR;
1024 			m0->m_pkthdr.len += m->m_len;
1025 			mlast->m_next = m;
1026 			mlast = m;
1027 			bufs_left--;
1028 		}
1029 
1030 		if (bufs_left == 0) {
1031 			ml_enqueue(&ml, m0);
1032 			m0 = NULL;
1033 		}
1034 	}
1035 	if (m0 != NULL) {
1036 		DPRINTF("%s: expected %d buffers, got %d\n", __func__,
1037 		    (int)hdr->num_buffers,
1038 		    (int)hdr->num_buffers - bufs_left);
1039 		ifp->if_ierrors++;
1040 		m_freem(m0);
1041 	}
1042 
1043 	if (ifiq_input(&ifp->if_rcv, &ml))
1044 		if_rxr_livelocked(&sc->sc_rx_ring);
1045 
1046 	return r;
1047 }
1048 
1049 int
1050 vio_rx_intr(struct virtqueue *vq)
1051 {
1052 	struct virtio_softc *vsc = vq->vq_owner;
1053 	struct vio_softc *sc = (struct vio_softc *)vsc->sc_child;
1054 	int r, sum = 0;
1055 
1056 again:
1057 	r = vio_rxeof(sc);
1058 	sum += r;
1059 	if (r) {
1060 		vio_populate_rx_mbufs(sc);
1061 		/* set used event index to the next slot */
1062 		if (virtio_has_feature(vsc, VIRTIO_F_RING_EVENT_IDX)) {
1063 			if (virtio_start_vq_intr(vq->vq_owner, vq))
1064 				goto again;
1065 		}
1066 	}
1067 
1068 	return sum;
1069 }
1070 
1071 void
1072 vio_rxtick(void *arg)
1073 {
1074 	struct virtqueue *vq = arg;
1075 	struct virtio_softc *vsc = vq->vq_owner;
1076 	struct vio_softc *sc = (struct vio_softc *)vsc->sc_child;
1077 	int s;
1078 
1079 	s = splnet();
1080 	vio_populate_rx_mbufs(sc);
1081 	splx(s);
1082 }
1083 
1084 /* free all the mbufs; called from if_stop(disable) */
1085 void
1086 vio_rx_drain(struct vio_softc *sc)
1087 {
1088 	struct virtqueue *vq = &sc->sc_vq[VQRX];
1089 	int i;
1090 
1091 	for (i = 0; i < vq->vq_num; i++) {
1092 		if (sc->sc_rx_mbufs[i] == NULL)
1093 			continue;
1094 		vio_free_rx_mbuf(sc, i);
1095 	}
1096 }
1097 
1098 /*
1099  * Transmission implementation
1100  */
1101 /* actual transmission is done in if_start */
1102 /* tx interrupt; dequeue and free mbufs */
1103 /*
1104  * tx interrupt is actually disabled unless the tx queue is full, i.e.
1105  * IFF_OACTIVE is set. vio_txtick is used to make sure that mbufs
1106  * are dequeued and freed even if no further transfer happens.
1107  */
1108 int
1109 vio_tx_intr(struct virtqueue *vq)
1110 {
1111 	struct virtio_softc *vsc = vq->vq_owner;
1112 	struct vio_softc *sc = (struct vio_softc *)vsc->sc_child;
1113 	struct ifnet *ifp = &sc->sc_ac.ac_if;
1114 	int r;
1115 
1116 	r = vio_txeof(vq);
1117 	vio_start(ifp);
1118 	return r;
1119 }
1120 
1121 void
1122 vio_txtick(void *arg)
1123 {
1124 	struct virtqueue *vq = arg;
1125 	int s = splnet();
1126 	vio_tx_intr(vq);
1127 	splx(s);
1128 }
1129 
1130 int
1131 vio_txeof(struct virtqueue *vq)
1132 {
1133 	struct virtio_softc *vsc = vq->vq_owner;
1134 	struct vio_softc *sc = (struct vio_softc *)vsc->sc_child;
1135 	struct ifnet *ifp = &sc->sc_ac.ac_if;
1136 	struct mbuf *m;
1137 	int r = 0;
1138 	int slot, len;
1139 
1140 	while (virtio_dequeue(vsc, vq, &slot, &len) == 0) {
1141 		struct virtio_net_hdr *hdr = &sc->sc_tx_hdrs[slot];
1142 		r++;
1143 		VIO_DMAMEM_SYNC(vsc, sc, hdr, sc->sc_hdr_size,
1144 		    BUS_DMASYNC_POSTWRITE);
1145 		bus_dmamap_sync(vsc->sc_dmat, sc->sc_tx_dmamaps[slot], 0,
1146 		    sc->sc_tx_dmamaps[slot]->dm_mapsize,
1147 		    BUS_DMASYNC_POSTWRITE);
1148 		m = sc->sc_tx_mbufs[slot];
1149 		bus_dmamap_unload(vsc->sc_dmat, sc->sc_tx_dmamaps[slot]);
1150 		sc->sc_tx_mbufs[slot] = NULL;
1151 		virtio_dequeue_commit(vq, slot);
1152 		m_freem(m);
1153 	}
1154 
1155 	if (r) {
1156 		ifq_clr_oactive(&ifp->if_snd);
1157 		virtio_stop_vq_intr(vsc, &sc->sc_vq[VQTX]);
1158 	}
1159 	if (vq->vq_used_idx == vq->vq_avail_idx)
1160 		timeout_del(&sc->sc_txtick);
1161 	else if (r)
1162 		timeout_add_sec(&sc->sc_txtick, 1);
1163 	return r;
1164 }
1165 
1166 int
1167 vio_encap(struct vio_softc *sc, int slot, struct mbuf *m)
1168 {
1169 	struct virtio_softc	*vsc = sc->sc_virtio;
1170 	bus_dmamap_t		 dmap= sc->sc_tx_dmamaps[slot];
1171 	int			 r;
1172 
1173 	r = bus_dmamap_load_mbuf(vsc->sc_dmat, dmap, m,
1174 	    BUS_DMA_WRITE|BUS_DMA_NOWAIT);
1175 	switch (r) {
1176 	case 0:
1177 		break;
1178 	case EFBIG:
1179 		if (m_defrag(m, M_DONTWAIT) == 0 &&
1180 		    bus_dmamap_load_mbuf(vsc->sc_dmat, dmap, m,
1181 		    BUS_DMA_WRITE|BUS_DMA_NOWAIT) == 0)
1182 			break;
1183 
1184 		/* FALLTHROUGH */
1185 	default:
1186 		return ENOBUFS;
1187 	}
1188 	sc->sc_tx_mbufs[slot] = m;
1189 	return 0;
1190 }
1191 
1192 /* free all the mbufs already put on vq; called from if_stop(disable) */
1193 void
1194 vio_tx_drain(struct vio_softc *sc)
1195 {
1196 	struct virtio_softc *vsc = sc->sc_virtio;
1197 	struct virtqueue *vq = &sc->sc_vq[VQTX];
1198 	int i;
1199 
1200 	for (i = 0; i < vq->vq_num; i++) {
1201 		if (sc->sc_tx_mbufs[i] == NULL)
1202 			continue;
1203 		bus_dmamap_unload(vsc->sc_dmat, sc->sc_tx_dmamaps[i]);
1204 		m_freem(sc->sc_tx_mbufs[i]);
1205 		sc->sc_tx_mbufs[i] = NULL;
1206 	}
1207 }
1208 
1209 /*
1210  * Control vq
1211  */
1212 /* issue a VIRTIO_NET_CTRL_RX class command and wait for completion */
1213 int
1214 vio_ctrl_rx(struct vio_softc *sc, int cmd, int onoff)
1215 {
1216 	struct virtio_softc *vsc = sc->sc_virtio;
1217 	struct virtqueue *vq = &sc->sc_vq[VQCTL];
1218 	int r, slot;
1219 
1220 	splassert(IPL_NET);
1221 
1222 	if ((r = vio_wait_ctrl(sc)) != 0)
1223 		return r;
1224 
1225 	sc->sc_ctrl_cmd->class = VIRTIO_NET_CTRL_RX;
1226 	sc->sc_ctrl_cmd->command = cmd;
1227 	sc->sc_ctrl_rx->onoff = onoff;
1228 
1229 	VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_cmd,
1230 	    sizeof(*sc->sc_ctrl_cmd), BUS_DMASYNC_PREWRITE);
1231 	VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_rx,
1232 	    sizeof(*sc->sc_ctrl_rx), BUS_DMASYNC_PREWRITE);
1233 	VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_status,
1234 	    sizeof(*sc->sc_ctrl_status), BUS_DMASYNC_PREREAD);
1235 
1236 	r = virtio_enqueue_prep(vq, &slot);
1237 	if (r != 0)
1238 		panic("%s: control vq busy!?", sc->sc_dev.dv_xname);
1239 	r = virtio_enqueue_reserve(vq, slot, 3);
1240 	if (r != 0)
1241 		panic("%s: control vq busy!?", sc->sc_dev.dv_xname);
1242 	VIO_DMAMEM_ENQUEUE(sc, vq, slot, sc->sc_ctrl_cmd,
1243 	    sizeof(*sc->sc_ctrl_cmd), 1);
1244 	VIO_DMAMEM_ENQUEUE(sc, vq, slot, sc->sc_ctrl_rx,
1245 	    sizeof(*sc->sc_ctrl_rx), 1);
1246 	VIO_DMAMEM_ENQUEUE(sc, vq, slot, sc->sc_ctrl_status,
1247 	    sizeof(*sc->sc_ctrl_status), 0);
1248 	virtio_enqueue_commit(vsc, vq, slot, 1);
1249 
1250 	if ((r = vio_wait_ctrl_done(sc)) != 0)
1251 		goto out;
1252 
1253 	VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_cmd,
1254 	    sizeof(*sc->sc_ctrl_cmd), BUS_DMASYNC_POSTWRITE);
1255 	VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_rx,
1256 	    sizeof(*sc->sc_ctrl_rx), BUS_DMASYNC_POSTWRITE);
1257 	VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_status,
1258 	    sizeof(*sc->sc_ctrl_status), BUS_DMASYNC_POSTREAD);
1259 
1260 	if (sc->sc_ctrl_status->ack == VIRTIO_NET_OK) {
1261 		r = 0;
1262 	} else {
1263 		printf("%s: ctrl cmd %d failed\n", sc->sc_dev.dv_xname, cmd);
1264 		r = EIO;
1265 	}
1266 
1267 	DPRINTF("%s: cmd %d %d: %d\n", __func__, cmd, (int)onoff, r);
1268 out:
1269 	vio_ctrl_wakeup(sc, FREE);
1270 	return r;
1271 }
1272 
1273 /*
1274  * XXXSMP As long as some per-ifp ioctl(2)s are executed with the
1275  * NET_LOCK() deadlocks are possible.  So release it here.
1276  */
1277 static inline int
1278 vio_sleep(struct vio_softc *sc, const char *wmesg)
1279 {
1280 	int status = rw_status(&netlock);
1281 
1282 	if (status != RW_WRITE && status != RW_READ)
1283 		return tsleep_nsec(&sc->sc_ctrl_inuse, PRIBIO|PCATCH, wmesg,
1284 		    INFSLP);
1285 
1286 	return rwsleep_nsec(&sc->sc_ctrl_inuse, &netlock, PRIBIO|PCATCH, wmesg,
1287 	    INFSLP);
1288 }
1289 
1290 int
1291 vio_wait_ctrl(struct vio_softc *sc)
1292 {
1293 	int r = 0;
1294 
1295 	while (sc->sc_ctrl_inuse != FREE) {
1296 		r = vio_sleep(sc, "viowait");
1297 		if (r == EINTR)
1298 			return r;
1299 	}
1300 	sc->sc_ctrl_inuse = INUSE;
1301 
1302 	return r;
1303 }
1304 
1305 int
1306 vio_wait_ctrl_done(struct vio_softc *sc)
1307 {
1308 	int r = 0;
1309 
1310 	while (sc->sc_ctrl_inuse != DONE && sc->sc_ctrl_inuse != RESET) {
1311 		if (sc->sc_ctrl_inuse == RESET) {
1312 			r = 1;
1313 			break;
1314 		}
1315 		r = vio_sleep(sc, "viodone");
1316 		if (r == EINTR)
1317 			break;
1318 	}
1319 	return r;
1320 }
1321 
1322 void
1323 vio_ctrl_wakeup(struct vio_softc *sc, enum vio_ctrl_state new)
1324 {
1325 	sc->sc_ctrl_inuse = new;
1326 	wakeup(&sc->sc_ctrl_inuse);
1327 }
1328 
1329 int
1330 vio_ctrleof(struct virtqueue *vq)
1331 {
1332 	struct virtio_softc *vsc = vq->vq_owner;
1333 	struct vio_softc *sc = (struct vio_softc *)vsc->sc_child;
1334 	int r = 0, ret, slot;
1335 
1336 again:
1337 	ret = virtio_dequeue(vsc, vq, &slot, NULL);
1338 	if (ret == ENOENT)
1339 		return r;
1340 	virtio_dequeue_commit(vq, slot);
1341 	r++;
1342 	vio_ctrl_wakeup(sc, DONE);
1343 	if (virtio_start_vq_intr(vsc, vq))
1344 		goto again;
1345 
1346 	return r;
1347 }
1348 
1349 /* issue VIRTIO_NET_CTRL_MAC_TABLE_SET command and wait for completion */
1350 int
1351 vio_set_rx_filter(struct vio_softc *sc)
1352 {
1353 	/* filter already set in sc_ctrl_mac_tbl */
1354 	struct virtio_softc *vsc = sc->sc_virtio;
1355 	struct virtqueue *vq = &sc->sc_vq[VQCTL];
1356 	int r, slot;
1357 
1358 	splassert(IPL_NET);
1359 
1360 	if ((r = vio_wait_ctrl(sc)) != 0)
1361 		return r;
1362 
1363 	sc->sc_ctrl_cmd->class = VIRTIO_NET_CTRL_MAC;
1364 	sc->sc_ctrl_cmd->command = VIRTIO_NET_CTRL_MAC_TABLE_SET;
1365 
1366 	VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_cmd,
1367 	    sizeof(*sc->sc_ctrl_cmd), BUS_DMASYNC_PREWRITE);
1368 	VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_mac_info,
1369 	    VIO_CTRL_MAC_INFO_SIZE, BUS_DMASYNC_PREWRITE);
1370 	VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_status,
1371 	    sizeof(*sc->sc_ctrl_status), BUS_DMASYNC_PREREAD);
1372 
1373 	r = virtio_enqueue_prep(vq, &slot);
1374 	if (r != 0)
1375 		panic("%s: control vq busy!?", sc->sc_dev.dv_xname);
1376 	r = virtio_enqueue_reserve(vq, slot, 4);
1377 	if (r != 0)
1378 		panic("%s: control vq busy!?", sc->sc_dev.dv_xname);
1379 	VIO_DMAMEM_ENQUEUE(sc, vq, slot, sc->sc_ctrl_cmd,
1380 	    sizeof(*sc->sc_ctrl_cmd), 1);
1381 	VIO_DMAMEM_ENQUEUE(sc, vq, slot, sc->sc_ctrl_mac_tbl_uc,
1382 	    sizeof(*sc->sc_ctrl_mac_tbl_uc) +
1383 	    sc->sc_ctrl_mac_tbl_uc->nentries * ETHER_ADDR_LEN, 1);
1384 	VIO_DMAMEM_ENQUEUE(sc, vq, slot, sc->sc_ctrl_mac_tbl_mc,
1385 	    sizeof(*sc->sc_ctrl_mac_tbl_mc) +
1386 	    sc->sc_ctrl_mac_tbl_mc->nentries * ETHER_ADDR_LEN, 1);
1387 	VIO_DMAMEM_ENQUEUE(sc, vq, slot, sc->sc_ctrl_status,
1388 	    sizeof(*sc->sc_ctrl_status), 0);
1389 	virtio_enqueue_commit(vsc, vq, slot, 1);
1390 
1391 	if ((r = vio_wait_ctrl_done(sc)) != 0)
1392 		goto out;
1393 
1394 	VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_cmd,
1395 	    sizeof(*sc->sc_ctrl_cmd), BUS_DMASYNC_POSTWRITE);
1396 	VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_mac_info,
1397 	    VIO_CTRL_MAC_INFO_SIZE, BUS_DMASYNC_POSTWRITE);
1398 	VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_status,
1399 	    sizeof(*sc->sc_ctrl_status), BUS_DMASYNC_POSTREAD);
1400 
1401 	if (sc->sc_ctrl_status->ack == VIRTIO_NET_OK) {
1402 		r = 0;
1403 	} else {
1404 		/* The host's filter table is not large enough */
1405 		printf("%s: failed setting rx filter\n", sc->sc_dev.dv_xname);
1406 		r = EIO;
1407 	}
1408 
1409 out:
1410 	vio_ctrl_wakeup(sc, FREE);
1411 	return r;
1412 }
1413 
1414 void
1415 vio_iff(struct vio_softc *sc)
1416 {
1417 	struct virtio_softc *vsc = sc->sc_virtio;
1418 	struct ifnet *ifp = &sc->sc_ac.ac_if;
1419 	struct arpcom *ac = &sc->sc_ac;
1420 	struct ether_multi *enm;
1421 	struct ether_multistep step;
1422 	int nentries = 0;
1423 	int promisc = 0, allmulti = 0, rxfilter = 0;
1424 	int r;
1425 
1426 	splassert(IPL_NET);
1427 
1428 	ifp->if_flags &= ~IFF_ALLMULTI;
1429 
1430 	if (vsc->sc_nvqs < 3) {
1431 		/* no ctrl vq; always promisc */
1432 		ifp->if_flags |= IFF_ALLMULTI | IFF_PROMISC;
1433 		return;
1434 	}
1435 
1436 	if (sc->sc_dev.dv_cfdata->cf_flags & CONFFLAG_QEMU_VLAN_BUG)
1437 		ifp->if_flags |= IFF_PROMISC;
1438 
1439 	if (ifp->if_flags & IFF_PROMISC || ac->ac_multirangecnt > 0 ||
1440 	    ac->ac_multicnt >= VIRTIO_NET_CTRL_MAC_MC_ENTRIES) {
1441 		ifp->if_flags |= IFF_ALLMULTI;
1442 		if (ifp->if_flags & IFF_PROMISC)
1443 			promisc = 1;
1444 		else
1445 			allmulti = 1;
1446 	} else {
1447 		rxfilter = 1;
1448 
1449 		ETHER_FIRST_MULTI(step, ac, enm);
1450 		while (enm != NULL) {
1451 			memcpy(sc->sc_ctrl_mac_tbl_mc->macs[nentries++],
1452 			    enm->enm_addrlo, ETHER_ADDR_LEN);
1453 
1454 			ETHER_NEXT_MULTI(step, enm);
1455 		}
1456 	}
1457 
1458 	/* set unicast address, VirtualBox wants that */
1459 	memcpy(sc->sc_ctrl_mac_tbl_uc->macs[0], ac->ac_enaddr, ETHER_ADDR_LEN);
1460 	sc->sc_ctrl_mac_tbl_uc->nentries = 1;
1461 
1462 	sc->sc_ctrl_mac_tbl_mc->nentries = rxfilter ? nentries : 0;
1463 
1464 	if (vsc->sc_nvqs < 3)
1465 		return;
1466 
1467 	r = vio_set_rx_filter(sc);
1468 	if (r == EIO)
1469 		allmulti = 1; /* fallback */
1470 	else if (r != 0)
1471 		return;
1472 
1473 	r = vio_ctrl_rx(sc, VIRTIO_NET_CTRL_RX_ALLMULTI, allmulti);
1474 	if (r == EIO)
1475 		promisc = 1; /* fallback */
1476 	else if (r != 0)
1477 		return;
1478 
1479 	vio_ctrl_rx(sc, VIRTIO_NET_CTRL_RX_PROMISC, promisc);
1480 }
1481