xref: /openbsd-src/sys/dev/pv/if_vio.c (revision 4b70baf6e17fc8b27fc1f7fa7929335753fa94c3)
1 /*	$OpenBSD: if_vio.c,v 1.10 2019/03/24 18:22:36 sf Exp $	*/
2 
3 /*
4  * Copyright (c) 2012 Stefan Fritsch, Alexander Fiveg.
5  * Copyright (c) 2010 Minoura Makoto.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include "bpfilter.h"
30 #include "vlan.h"
31 
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/kernel.h>
35 #include <sys/device.h>
36 #include <sys/mbuf.h>
37 #include <sys/socket.h>
38 #include <sys/sockio.h>
39 #include <sys/timeout.h>
40 
41 #include <dev/pv/virtioreg.h>
42 #include <dev/pv/virtiovar.h>
43 
44 #include <net/if.h>
45 #include <net/if_media.h>
46 
47 #include <netinet/in.h>
48 #include <netinet/if_ether.h>
49 #include <netinet/ip.h>
50 #include <netinet/tcp.h>
51 #include <netinet/udp.h>
52 
53 #if NBPFILTER > 0
54 #include <net/bpf.h>
55 #endif
56 
57 #if VIRTIO_DEBUG
58 #define DPRINTF(x...) printf(x)
59 #else
60 #define DPRINTF(x...)
61 #endif
62 
63 /*
64  * if_vioreg.h:
65  */
66 /* Configuration registers */
67 #define VIRTIO_NET_CONFIG_MAC		0 /* 8bit x 6byte */
68 #define VIRTIO_NET_CONFIG_STATUS	6 /* 16bit */
69 
70 /* Feature bits */
71 #define VIRTIO_NET_F_CSUM			(1ULL<<0)
72 #define VIRTIO_NET_F_GUEST_CSUM			(1ULL<<1)
73 #define VIRTIO_NET_F_CTRL_GUEST_OFFLOADS        (1ULL<<2)
74 #define VIRTIO_NET_F_MTU                        (1ULL<<3)
75 #define VIRTIO_NET_F_MAC			(1ULL<<5)
76 #define VIRTIO_NET_F_GSO			(1ULL<<6)
77 #define VIRTIO_NET_F_GUEST_TSO4			(1ULL<<7)
78 #define VIRTIO_NET_F_GUEST_TSO6			(1ULL<<8)
79 #define VIRTIO_NET_F_GUEST_ECN			(1ULL<<9)
80 #define VIRTIO_NET_F_GUEST_UFO			(1ULL<<10)
81 #define VIRTIO_NET_F_HOST_TSO4			(1ULL<<11)
82 #define VIRTIO_NET_F_HOST_TSO6			(1ULL<<12)
83 #define VIRTIO_NET_F_HOST_ECN			(1ULL<<13)
84 #define VIRTIO_NET_F_HOST_UFO			(1ULL<<14)
85 #define VIRTIO_NET_F_MRG_RXBUF			(1ULL<<15)
86 #define VIRTIO_NET_F_STATUS			(1ULL<<16)
87 #define VIRTIO_NET_F_CTRL_VQ			(1ULL<<17)
88 #define VIRTIO_NET_F_CTRL_RX			(1ULL<<18)
89 #define VIRTIO_NET_F_CTRL_VLAN			(1ULL<<19)
90 #define VIRTIO_NET_F_CTRL_RX_EXTRA		(1ULL<<20)
91 #define VIRTIO_NET_F_GUEST_ANNOUNCE		(1ULL<<21)
92 #define VIRTIO_NET_F_MQ				(1ULL<<22)
93 #define VIRTIO_NET_F_CTRL_MAC_ADDR		(1ULL<<23)
94 
95 /*
96  * Config(8) flags. The lowest byte is reserved for generic virtio stuff.
97  */
98 
99 /* Workaround for vlan related bug in qemu < version 2.0 */
100 #define CONFFLAG_QEMU_VLAN_BUG		(1<<8)
101 
102 static const struct virtio_feature_name virtio_net_feature_names[] = {
103 #if VIRTIO_DEBUG
104 	{ VIRTIO_NET_F_CSUM,			"CSum" },
105 	{ VIRTIO_NET_F_GUEST_CSUM,		"GuestCSum" },
106 	{ VIRTIO_NET_F_CTRL_GUEST_OFFLOADS,	"CtrlGuestOffl" },
107 	{ VIRTIO_NET_F_MTU,			"MTU", },
108 	{ VIRTIO_NET_F_MAC,			"MAC" },
109 	{ VIRTIO_NET_F_GSO,			"GSO" },
110 	{ VIRTIO_NET_F_GUEST_TSO4,		"GuestTSO4" },
111 	{ VIRTIO_NET_F_GUEST_TSO6,		"GuestTSO6" },
112 	{ VIRTIO_NET_F_GUEST_ECN,		"GuestECN" },
113 	{ VIRTIO_NET_F_GUEST_UFO,		"GuestUFO" },
114 	{ VIRTIO_NET_F_HOST_TSO4,		"HostTSO4" },
115 	{ VIRTIO_NET_F_HOST_TSO6,		"HostTSO6" },
116 	{ VIRTIO_NET_F_HOST_ECN, 		"HostECN" },
117 	{ VIRTIO_NET_F_HOST_UFO, 		"HostUFO" },
118 	{ VIRTIO_NET_F_MRG_RXBUF,		"MrgRXBuf" },
119 	{ VIRTIO_NET_F_STATUS,			"Status" },
120 	{ VIRTIO_NET_F_CTRL_VQ,			"CtrlVQ" },
121 	{ VIRTIO_NET_F_CTRL_RX,			"CtrlRX" },
122 	{ VIRTIO_NET_F_CTRL_VLAN,		"CtrlVLAN" },
123 	{ VIRTIO_NET_F_CTRL_RX_EXTRA,		"CtrlRXExtra" },
124 	{ VIRTIO_NET_F_GUEST_ANNOUNCE,		"GuestAnnounce" },
125 	{ VIRTIO_NET_F_MQ,			"MQ" },
126 	{ VIRTIO_NET_F_CTRL_MAC_ADDR,		"CtrlMAC" },
127 #endif
128 	{ 0, 				NULL }
129 };
130 
131 /* Status */
132 #define VIRTIO_NET_S_LINK_UP	1
133 
134 /* Packet header structure */
135 struct virtio_net_hdr {
136 	uint8_t		flags;
137 	uint8_t		gso_type;
138 	uint16_t	hdr_len;
139 	uint16_t	gso_size;
140 	uint16_t	csum_start;
141 	uint16_t	csum_offset;
142 
143 	/* only present if VIRTIO_NET_F_MRG_RXBUF is negotiated */
144 	uint16_t	num_buffers;
145 } __packed;
146 
147 #define VIRTIO_NET_HDR_F_NEEDS_CSUM	1 /* flags */
148 #define VIRTIO_NET_HDR_GSO_NONE		0 /* gso_type */
149 #define VIRTIO_NET_HDR_GSO_TCPV4	1 /* gso_type */
150 #define VIRTIO_NET_HDR_GSO_UDP		3 /* gso_type */
151 #define VIRTIO_NET_HDR_GSO_TCPV6	4 /* gso_type */
152 #define VIRTIO_NET_HDR_GSO_ECN		0x80 /* gso_type, |'ed */
153 
154 #define VIRTIO_NET_MAX_GSO_LEN		(65536+ETHER_HDR_LEN)
155 
156 /* Control virtqueue */
157 struct virtio_net_ctrl_cmd {
158 	uint8_t	class;
159 	uint8_t	command;
160 } __packed;
161 #define VIRTIO_NET_CTRL_RX		0
162 # define VIRTIO_NET_CTRL_RX_PROMISC	0
163 # define VIRTIO_NET_CTRL_RX_ALLMULTI	1
164 
165 #define VIRTIO_NET_CTRL_MAC		1
166 # define VIRTIO_NET_CTRL_MAC_TABLE_SET	0
167 
168 #define VIRTIO_NET_CTRL_VLAN		2
169 # define VIRTIO_NET_CTRL_VLAN_ADD	0
170 # define VIRTIO_NET_CTRL_VLAN_DEL	1
171 
172 struct virtio_net_ctrl_status {
173 	uint8_t	ack;
174 } __packed;
175 #define VIRTIO_NET_OK			0
176 #define VIRTIO_NET_ERR			1
177 
178 struct virtio_net_ctrl_rx {
179 	uint8_t	onoff;
180 } __packed;
181 
182 struct virtio_net_ctrl_mac_tbl {
183 	uint32_t nentries;
184 	uint8_t macs[][ETHER_ADDR_LEN];
185 } __packed;
186 
187 struct virtio_net_ctrl_vlan {
188 	uint16_t id;
189 } __packed;
190 
191 /*
192  * if_viovar.h:
193  */
194 enum vio_ctrl_state {
195 	FREE, INUSE, DONE, RESET
196 };
197 
198 struct vio_softc {
199 	struct device		sc_dev;
200 
201 	struct virtio_softc	*sc_virtio;
202 #define	VQRX	0
203 #define	VQTX	1
204 #define	VQCTL	2
205 	struct virtqueue	sc_vq[3];
206 
207 	struct arpcom		sc_ac;
208 	struct ifmedia		sc_media;
209 
210 	short			sc_ifflags;
211 
212 	/* bus_dmamem */
213 	bus_dma_segment_t	sc_dma_seg;
214 	bus_dmamap_t		sc_dma_map;
215 	size_t			sc_dma_size;
216 	caddr_t			sc_dma_kva;
217 
218 	int			sc_hdr_size;
219 	struct virtio_net_hdr	*sc_tx_hdrs;
220 	struct virtio_net_ctrl_cmd *sc_ctrl_cmd;
221 	struct virtio_net_ctrl_status *sc_ctrl_status;
222 	struct virtio_net_ctrl_rx *sc_ctrl_rx;
223 	struct virtio_net_ctrl_mac_tbl *sc_ctrl_mac_tbl_uc;
224 #define sc_ctrl_mac_info sc_ctrl_mac_tbl_uc
225 	struct virtio_net_ctrl_mac_tbl *sc_ctrl_mac_tbl_mc;
226 
227 	/* kmem */
228 	bus_dmamap_t		*sc_arrays;
229 #define sc_rx_dmamaps sc_arrays
230 	bus_dmamap_t		*sc_tx_dmamaps;
231 	struct mbuf		**sc_rx_mbufs;
232 	struct mbuf		**sc_tx_mbufs;
233 	struct if_rxring	sc_rx_ring;
234 
235 	enum vio_ctrl_state	sc_ctrl_inuse;
236 
237 	struct timeout		sc_txtick, sc_rxtick;
238 };
239 
240 #define VIO_DMAMEM_OFFSET(sc, p) ((caddr_t)(p) - (sc)->sc_dma_kva)
241 #define VIO_DMAMEM_SYNC(vsc, sc, p, size, flags)		\
242 	bus_dmamap_sync((vsc)->sc_dmat, (sc)->sc_dma_map,	\
243 	    VIO_DMAMEM_OFFSET((sc), (p)), (size), (flags))
244 #define VIO_DMAMEM_ENQUEUE(sc, vq, slot, p, size, write)	\
245 	virtio_enqueue_p((vq), (slot), (sc)->sc_dma_map,	\
246 	    VIO_DMAMEM_OFFSET((sc), (p)), (size), (write))
247 #define VIO_HAVE_MRG_RXBUF(sc)					\
248 	((sc)->sc_hdr_size == sizeof(struct virtio_net_hdr))
249 
250 #define VIRTIO_NET_TX_MAXNSEGS		16 /* for larger chains, defrag */
251 #define VIRTIO_NET_CTRL_MAC_MC_ENTRIES	64 /* for more entries, use ALLMULTI */
252 #define VIRTIO_NET_CTRL_MAC_UC_ENTRIES	 1 /* one entry for own unicast addr */
253 
254 #define VIO_CTRL_MAC_INFO_SIZE 					\
255 	(2*sizeof(struct virtio_net_ctrl_mac_tbl) + 		\
256 	 (VIRTIO_NET_CTRL_MAC_MC_ENTRIES + 			\
257 	  VIRTIO_NET_CTRL_MAC_UC_ENTRIES) * ETHER_ADDR_LEN)
258 
259 /* cfattach interface functions */
260 int	vio_match(struct device *, void *, void *);
261 void	vio_attach(struct device *, struct device *, void *);
262 
263 /* ifnet interface functions */
264 int	vio_init(struct ifnet *);
265 void	vio_stop(struct ifnet *, int);
266 void	vio_start(struct ifnet *);
267 int	vio_ioctl(struct ifnet *, u_long, caddr_t);
268 void	vio_get_lladr(struct arpcom *ac, struct virtio_softc *vsc);
269 void	vio_put_lladr(struct arpcom *ac, struct virtio_softc *vsc);
270 
271 /* rx */
272 int	vio_add_rx_mbuf(struct vio_softc *, int);
273 void	vio_free_rx_mbuf(struct vio_softc *, int);
274 void	vio_populate_rx_mbufs(struct vio_softc *);
275 int	vio_rxeof(struct vio_softc *);
276 int	vio_rx_intr(struct virtqueue *);
277 void	vio_rx_drain(struct vio_softc *);
278 void	vio_rxtick(void *);
279 
280 /* tx */
281 int	vio_tx_intr(struct virtqueue *);
282 int	vio_txeof(struct virtqueue *);
283 void	vio_tx_drain(struct vio_softc *);
284 int	vio_encap(struct vio_softc *, int, struct mbuf *);
285 void	vio_txtick(void *);
286 
287 /* other control */
288 void	vio_link_state(struct ifnet *);
289 int	vio_config_change(struct virtio_softc *);
290 int	vio_ctrl_rx(struct vio_softc *, int, int);
291 int	vio_set_rx_filter(struct vio_softc *);
292 void	vio_iff(struct vio_softc *);
293 int	vio_media_change(struct ifnet *);
294 void	vio_media_status(struct ifnet *, struct ifmediareq *);
295 int	vio_ctrleof(struct virtqueue *);
296 int	vio_wait_ctrl(struct vio_softc *sc);
297 int	vio_wait_ctrl_done(struct vio_softc *sc);
298 void	vio_ctrl_wakeup(struct vio_softc *, enum vio_ctrl_state);
299 int	vio_alloc_mem(struct vio_softc *);
300 int	vio_alloc_dmamem(struct vio_softc *);
301 void	vio_free_dmamem(struct vio_softc *);
302 
303 #if VIRTIO_DEBUG
304 void	vio_dump(struct vio_softc *);
305 #endif
306 
307 int
308 vio_match(struct device *parent, void *match, void *aux)
309 {
310 	struct virtio_softc *va = aux;
311 
312 	if (va->sc_childdevid == PCI_PRODUCT_VIRTIO_NETWORK)
313 		return 1;
314 
315 	return 0;
316 }
317 
318 struct cfattach vio_ca = {
319 	sizeof(struct vio_softc), vio_match, vio_attach, NULL
320 };
321 
322 struct cfdriver vio_cd = {
323 	NULL, "vio", DV_IFNET
324 };
325 
326 int
327 vio_alloc_dmamem(struct vio_softc *sc)
328 {
329 	struct virtio_softc *vsc = sc->sc_virtio;
330 	int nsegs;
331 
332 	if (bus_dmamap_create(vsc->sc_dmat, sc->sc_dma_size, 1,
333 	    sc->sc_dma_size, 0, BUS_DMA_NOWAIT | BUS_DMA_ALLOCNOW,
334 	    &sc->sc_dma_map) != 0)
335 		goto err;
336 	if (bus_dmamem_alloc(vsc->sc_dmat, sc->sc_dma_size, 16, 0,
337 	    &sc->sc_dma_seg, 1, &nsegs, BUS_DMA_NOWAIT | BUS_DMA_ZERO) != 0)
338 		goto destroy;
339 	if (bus_dmamem_map(vsc->sc_dmat, &sc->sc_dma_seg, nsegs,
340 	    sc->sc_dma_size, &sc->sc_dma_kva, BUS_DMA_NOWAIT) != 0)
341 		goto free;
342 	if (bus_dmamap_load(vsc->sc_dmat, sc->sc_dma_map, sc->sc_dma_kva,
343 	    sc->sc_dma_size, NULL, BUS_DMA_NOWAIT) != 0)
344 		goto unmap;
345 	return (0);
346 
347 unmap:
348 	bus_dmamem_unmap(vsc->sc_dmat, sc->sc_dma_kva, sc->sc_dma_size);
349 free:
350 	bus_dmamem_free(vsc->sc_dmat, &sc->sc_dma_seg, 1);
351 destroy:
352 	bus_dmamap_destroy(vsc->sc_dmat, sc->sc_dma_map);
353 err:
354 	return (1);
355 }
356 
357 void
358 vio_free_dmamem(struct vio_softc *sc)
359 {
360 	struct virtio_softc *vsc = sc->sc_virtio;
361 	bus_dmamap_unload(vsc->sc_dmat, sc->sc_dma_map);
362 	bus_dmamem_unmap(vsc->sc_dmat, sc->sc_dma_kva, sc->sc_dma_size);
363 	bus_dmamem_free(vsc->sc_dmat, &sc->sc_dma_seg, 1);
364 	bus_dmamap_destroy(vsc->sc_dmat, sc->sc_dma_map);
365 }
366 
367 /* allocate memory */
368 /*
369  * dma memory is used for:
370  *   sc_tx_hdrs[slot]:	 metadata array for frames to be sent (WRITE)
371  *   sc_ctrl_cmd:	 command to be sent via ctrl vq (WRITE)
372  *   sc_ctrl_status:	 return value for a command via ctrl vq (READ)
373  *   sc_ctrl_rx:	 parameter for a VIRTIO_NET_CTRL_RX class command
374  *			 (WRITE)
375  *   sc_ctrl_mac_tbl_uc: unicast MAC address filter for a VIRTIO_NET_CTRL_MAC
376  *			 class command (WRITE)
377  *   sc_ctrl_mac_tbl_mc: multicast MAC address filter for a VIRTIO_NET_CTRL_MAC
378  *			 class command (WRITE)
379  * sc_ctrl_* structures are allocated only one each; they are protected by
380  * sc_ctrl_inuse, which must only be accessed at splnet
381  *
382  * metadata headers for received frames are stored at the start of the
383  * rx mbufs.
384  */
385 /*
386  * dynamically allocated memory is used for:
387  *   sc_rx_dmamaps[slot]:	bus_dmamap_t array for received payload
388  *   sc_tx_dmamaps[slot]:	bus_dmamap_t array for sent payload
389  *   sc_rx_mbufs[slot]:		mbuf pointer array for received frames
390  *   sc_tx_mbufs[slot]:		mbuf pointer array for sent frames
391  */
392 int
393 vio_alloc_mem(struct vio_softc *sc)
394 {
395 	struct virtio_softc *vsc = sc->sc_virtio;
396 	struct ifnet *ifp = &sc->sc_ac.ac_if;
397 	int allocsize, r, i, txsize;
398 	unsigned int offset = 0;
399 	int rxqsize, txqsize;
400 	caddr_t kva;
401 
402 	rxqsize = vsc->sc_vqs[0].vq_num;
403 	txqsize = vsc->sc_vqs[1].vq_num;
404 
405 	/*
406 	 * For simplicity, we always allocate the full virtio_net_hdr size
407 	 * even if VIRTIO_NET_F_MRG_RXBUF is not negotiated and
408 	 * only a part of the memory is ever used.
409 	 */
410 	allocsize = sizeof(struct virtio_net_hdr) * txqsize;
411 
412 	if (vsc->sc_nvqs == 3) {
413 		allocsize += sizeof(struct virtio_net_ctrl_cmd) * 1;
414 		allocsize += sizeof(struct virtio_net_ctrl_status) * 1;
415 		allocsize += sizeof(struct virtio_net_ctrl_rx) * 1;
416 		allocsize += VIO_CTRL_MAC_INFO_SIZE;
417 	}
418 	sc->sc_dma_size = allocsize;
419 
420 	if (vio_alloc_dmamem(sc) != 0) {
421 		printf("unable to allocate dma region\n");
422 		return  -1;
423 	}
424 
425 	kva = sc->sc_dma_kva;
426 	sc->sc_tx_hdrs = (struct virtio_net_hdr*)(kva + offset);
427 	offset += sizeof(struct virtio_net_hdr) * txqsize;
428 	if (vsc->sc_nvqs == 3) {
429 		sc->sc_ctrl_cmd = (void*)(kva + offset);
430 		offset += sizeof(*sc->sc_ctrl_cmd);
431 		sc->sc_ctrl_status = (void*)(kva + offset);
432 		offset += sizeof(*sc->sc_ctrl_status);
433 		sc->sc_ctrl_rx = (void*)(kva + offset);
434 		offset += sizeof(*sc->sc_ctrl_rx);
435 		sc->sc_ctrl_mac_tbl_uc = (void*)(kva + offset);
436 		offset += sizeof(*sc->sc_ctrl_mac_tbl_uc) +
437 		    ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_UC_ENTRIES;
438 		sc->sc_ctrl_mac_tbl_mc = (void*)(kva + offset);
439 	}
440 
441 	sc->sc_arrays = mallocarray(rxqsize + txqsize,
442 	    2 * sizeof(bus_dmamap_t) + sizeof(struct mbuf *), M_DEVBUF,
443 	    M_WAITOK | M_CANFAIL | M_ZERO);
444 	if (sc->sc_arrays == NULL) {
445 		printf("unable to allocate mem for dmamaps\n");
446 		goto err_hdr;
447 	}
448 	allocsize = (rxqsize + txqsize) *
449 	    (2 * sizeof(bus_dmamap_t) + sizeof(struct mbuf *));
450 
451 	sc->sc_tx_dmamaps = sc->sc_arrays + rxqsize;
452 	sc->sc_rx_mbufs = (void*) (sc->sc_tx_dmamaps + txqsize);
453 	sc->sc_tx_mbufs = sc->sc_rx_mbufs + rxqsize;
454 
455 	for (i = 0; i < rxqsize; i++) {
456 		r = bus_dmamap_create(vsc->sc_dmat, MCLBYTES, 1, MCLBYTES, 0,
457 		    BUS_DMA_NOWAIT|BUS_DMA_ALLOCNOW, &sc->sc_rx_dmamaps[i]);
458 		if (r != 0)
459 			goto err_reqs;
460 	}
461 
462 	txsize = ifp->if_hardmtu + sc->sc_hdr_size + ETHER_HDR_LEN;
463 	for (i = 0; i < txqsize; i++) {
464 		r = bus_dmamap_create(vsc->sc_dmat, txsize,
465 		    VIRTIO_NET_TX_MAXNSEGS, txsize, 0,
466 		    BUS_DMA_NOWAIT|BUS_DMA_ALLOCNOW,
467 		    &sc->sc_tx_dmamaps[i]);
468 		if (r != 0)
469 			goto err_reqs;
470 	}
471 
472 	return 0;
473 
474 err_reqs:
475 	printf("dmamap creation failed, error %d\n", r);
476 	for (i = 0; i < txqsize; i++) {
477 		if (sc->sc_tx_dmamaps[i])
478 			bus_dmamap_destroy(vsc->sc_dmat, sc->sc_tx_dmamaps[i]);
479 	}
480 	for (i = 0; i < rxqsize; i++) {
481 		if (sc->sc_rx_dmamaps[i])
482 			bus_dmamap_destroy(vsc->sc_dmat, sc->sc_rx_dmamaps[i]);
483 	}
484 	if (sc->sc_arrays) {
485 		free(sc->sc_arrays, M_DEVBUF, 0);
486 		sc->sc_arrays = 0;
487 	}
488 err_hdr:
489 	vio_free_dmamem(sc);
490 	return -1;
491 }
492 
493 void
494 vio_get_lladr(struct arpcom *ac, struct virtio_softc *vsc)
495 {
496 	int i;
497 	for (i = 0; i < ETHER_ADDR_LEN; i++) {
498 		ac->ac_enaddr[i] = virtio_read_device_config_1(vsc,
499 		    VIRTIO_NET_CONFIG_MAC + i);
500 	}
501 }
502 
503 void
504 vio_put_lladr(struct arpcom *ac, struct virtio_softc *vsc)
505 {
506 	int i;
507 	for (i = 0; i < ETHER_ADDR_LEN; i++) {
508 		virtio_write_device_config_1(vsc, VIRTIO_NET_CONFIG_MAC + i,
509 		     ac->ac_enaddr[i]);
510 	}
511 }
512 
513 void
514 vio_attach(struct device *parent, struct device *self, void *aux)
515 {
516 	struct vio_softc *sc = (struct vio_softc *)self;
517 	struct virtio_softc *vsc = (struct virtio_softc *)parent;
518 	uint64_t features;
519 	int i;
520 	struct ifnet *ifp = &sc->sc_ac.ac_if;
521 
522 	if (vsc->sc_child != NULL) {
523 		printf(": child already attached for %s; something wrong...\n",
524 		       parent->dv_xname);
525 		return;
526 	}
527 
528 	sc->sc_virtio = vsc;
529 
530 	vsc->sc_child = self;
531 	vsc->sc_ipl = IPL_NET;
532 	vsc->sc_vqs = &sc->sc_vq[0];
533 	vsc->sc_config_change = 0;
534 
535 	features = VIRTIO_NET_F_MAC | VIRTIO_NET_F_STATUS |
536 	    VIRTIO_NET_F_CTRL_VQ | VIRTIO_NET_F_CTRL_RX |
537 	    VIRTIO_NET_F_MRG_RXBUF | VIRTIO_NET_F_CSUM;
538 	/*
539 	 * VIRTIO_F_RING_EVENT_IDX can be switched off by setting bit 2 in the
540 	 * driver flags, see config(8)
541 	 */
542 	if (!(sc->sc_dev.dv_cfdata->cf_flags & VIRTIO_CF_NO_EVENT_IDX) &&
543 	    !(vsc->sc_dev.dv_cfdata->cf_flags & VIRTIO_CF_NO_EVENT_IDX))
544 		features |= VIRTIO_F_RING_EVENT_IDX;
545 	else
546 		printf(": RingEventIdx disabled by UKC");
547 
548 	features = virtio_negotiate_features(vsc, features,
549 	    virtio_net_feature_names);
550 	if (features & VIRTIO_NET_F_MAC) {
551 		vio_get_lladr(&sc->sc_ac, vsc);
552 	} else {
553 		ether_fakeaddr(ifp);
554 		vio_put_lladr(&sc->sc_ac, vsc);
555 	}
556 	printf(": address %s\n", ether_sprintf(sc->sc_ac.ac_enaddr));
557 
558 	if (features & VIRTIO_NET_F_MRG_RXBUF) {
559 		sc->sc_hdr_size = sizeof(struct virtio_net_hdr);
560 		ifp->if_hardmtu = 16000; /* arbitrary limit */
561 	} else {
562 		sc->sc_hdr_size = offsetof(struct virtio_net_hdr, num_buffers);
563 		ifp->if_hardmtu = MCLBYTES - sc->sc_hdr_size - ETHER_HDR_LEN;
564 	}
565 
566 	if (virtio_alloc_vq(vsc, &sc->sc_vq[VQRX], 0, MCLBYTES, 2, "rx") != 0)
567 		goto err;
568 	vsc->sc_nvqs = 1;
569 	sc->sc_vq[VQRX].vq_done = vio_rx_intr;
570 	if (virtio_alloc_vq(vsc, &sc->sc_vq[VQTX], 1,
571 	    sc->sc_hdr_size + ifp->if_hardmtu + ETHER_HDR_LEN,
572 	    VIRTIO_NET_TX_MAXNSEGS + 1, "tx") != 0) {
573 		goto err;
574 	}
575 	vsc->sc_nvqs = 2;
576 	sc->sc_vq[VQTX].vq_done = vio_tx_intr;
577 	virtio_start_vq_intr(vsc, &sc->sc_vq[VQRX]);
578 	if (features & VIRTIO_F_RING_EVENT_IDX)
579 		virtio_postpone_intr_far(&sc->sc_vq[VQTX]);
580 	else
581 		virtio_stop_vq_intr(vsc, &sc->sc_vq[VQTX]);
582 	if ((features & VIRTIO_NET_F_CTRL_VQ)
583 	    && (features & VIRTIO_NET_F_CTRL_RX)) {
584 		if (virtio_alloc_vq(vsc, &sc->sc_vq[VQCTL], 2, NBPG, 1,
585 		    "control") == 0) {
586 			sc->sc_vq[VQCTL].vq_done = vio_ctrleof;
587 			virtio_start_vq_intr(vsc, &sc->sc_vq[VQCTL]);
588 			vsc->sc_nvqs = 3;
589 		}
590 	}
591 
592 	if (vio_alloc_mem(sc) < 0)
593 		goto err;
594 
595 	strlcpy(ifp->if_xname, self->dv_xname, IFNAMSIZ);
596 	ifp->if_softc = sc;
597 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
598 	ifp->if_start = vio_start;
599 	ifp->if_ioctl = vio_ioctl;
600 	ifp->if_capabilities = IFCAP_VLAN_MTU;
601 	if (features & VIRTIO_NET_F_CSUM)
602 		ifp->if_capabilities |= IFCAP_CSUM_TCPv4|IFCAP_CSUM_UDPv4;
603 	IFQ_SET_MAXLEN(&ifp->if_snd, vsc->sc_vqs[1].vq_num - 1);
604 	ifmedia_init(&sc->sc_media, 0, vio_media_change, vio_media_status);
605 	ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
606 	ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO);
607 	vsc->sc_config_change = vio_config_change;
608 	timeout_set(&sc->sc_txtick, vio_txtick, &sc->sc_vq[VQTX]);
609 	timeout_set(&sc->sc_rxtick, vio_rxtick, &sc->sc_vq[VQRX]);
610 
611 	if_attach(ifp);
612 	ether_ifattach(ifp);
613 
614 	return;
615 
616 err:
617 	for (i = 0; i < vsc->sc_nvqs; i++)
618 		virtio_free_vq(vsc, &sc->sc_vq[i]);
619 	vsc->sc_nvqs = 0;
620 	vsc->sc_child = VIRTIO_CHILD_ERROR;
621 	return;
622 }
623 
624 /* check link status */
625 void
626 vio_link_state(struct ifnet *ifp)
627 {
628 	struct vio_softc *sc = ifp->if_softc;
629 	struct virtio_softc *vsc = sc->sc_virtio;
630 	int link_state = LINK_STATE_FULL_DUPLEX;
631 
632 	if (vsc->sc_features & VIRTIO_NET_F_STATUS) {
633 		int status = virtio_read_device_config_2(vsc,
634 		    VIRTIO_NET_CONFIG_STATUS);
635 		if (!(status & VIRTIO_NET_S_LINK_UP))
636 			link_state = LINK_STATE_DOWN;
637 	}
638 	if (ifp->if_link_state != link_state) {
639 		ifp->if_link_state = link_state;
640 		if_link_state_change(ifp);
641 	}
642 }
643 
644 int
645 vio_config_change(struct virtio_softc *vsc)
646 {
647 	struct vio_softc *sc = (struct vio_softc *)vsc->sc_child;
648 	vio_link_state(&sc->sc_ac.ac_if);
649 	return 1;
650 }
651 
652 int
653 vio_media_change(struct ifnet *ifp)
654 {
655 	/* Ignore */
656 	return (0);
657 }
658 
659 void
660 vio_media_status(struct ifnet *ifp, struct ifmediareq *imr)
661 {
662 	imr->ifm_active = IFM_ETHER | IFM_AUTO;
663 	imr->ifm_status = IFM_AVALID;
664 
665 	vio_link_state(ifp);
666 	if (LINK_STATE_IS_UP(ifp->if_link_state) && ifp->if_flags & IFF_UP)
667 		imr->ifm_status |= IFM_ACTIVE|IFM_FDX;
668 }
669 
670 /*
671  * Interface functions for ifnet
672  */
673 int
674 vio_init(struct ifnet *ifp)
675 {
676 	struct vio_softc *sc = ifp->if_softc;
677 
678 	vio_stop(ifp, 0);
679 	if_rxr_init(&sc->sc_rx_ring, 2 * ((ifp->if_hardmtu / MCLBYTES) + 1),
680 	    sc->sc_vq[VQRX].vq_num);
681 	vio_populate_rx_mbufs(sc);
682 	ifp->if_flags |= IFF_RUNNING;
683 	ifq_clr_oactive(&ifp->if_snd);
684 	vio_iff(sc);
685 	vio_link_state(ifp);
686 	return 0;
687 }
688 
689 void
690 vio_stop(struct ifnet *ifp, int disable)
691 {
692 	struct vio_softc *sc = ifp->if_softc;
693 	struct virtio_softc *vsc = sc->sc_virtio;
694 
695 	timeout_del(&sc->sc_txtick);
696 	timeout_del(&sc->sc_rxtick);
697 	ifp->if_flags &= ~IFF_RUNNING;
698 	ifq_clr_oactive(&ifp->if_snd);
699 	/* only way to stop I/O and DMA is resetting... */
700 	virtio_reset(vsc);
701 	vio_rxeof(sc);
702 	if (vsc->sc_nvqs >= 3)
703 		vio_ctrleof(&sc->sc_vq[VQCTL]);
704 	vio_tx_drain(sc);
705 	if (disable)
706 		vio_rx_drain(sc);
707 
708 	virtio_reinit_start(vsc);
709 	virtio_negotiate_features(vsc, vsc->sc_features, NULL);
710 	virtio_start_vq_intr(vsc, &sc->sc_vq[VQRX]);
711 	virtio_stop_vq_intr(vsc, &sc->sc_vq[VQTX]);
712 	if (vsc->sc_nvqs >= 3)
713 		virtio_start_vq_intr(vsc, &sc->sc_vq[VQCTL]);
714 	virtio_reinit_end(vsc);
715 	if (vsc->sc_nvqs >= 3) {
716 		if (sc->sc_ctrl_inuse != FREE)
717 			sc->sc_ctrl_inuse = RESET;
718 		wakeup(&sc->sc_ctrl_inuse);
719 	}
720 }
721 
722 void
723 vio_start(struct ifnet *ifp)
724 {
725 	struct vio_softc *sc = ifp->if_softc;
726 	struct virtio_softc *vsc = sc->sc_virtio;
727 	struct virtqueue *vq = &sc->sc_vq[VQTX];
728 	struct mbuf *m;
729 	int queued = 0;
730 
731 	vio_txeof(vq);
732 
733 	if (!(ifp->if_flags & IFF_RUNNING) || ifq_is_oactive(&ifp->if_snd))
734 		return;
735 	if (IFQ_IS_EMPTY(&ifp->if_snd))
736 		return;
737 
738 again:
739 	for (;;) {
740 		int slot, r;
741 		struct virtio_net_hdr *hdr;
742 
743 		m = ifq_deq_begin(&ifp->if_snd);
744 		if (m == NULL)
745 			break;
746 
747 		r = virtio_enqueue_prep(vq, &slot);
748 		if (r == EAGAIN) {
749 			ifq_deq_rollback(&ifp->if_snd, m);
750 			ifq_set_oactive(&ifp->if_snd);
751 			break;
752 		}
753 		if (r != 0)
754 			panic("enqueue_prep for a tx buffer: %d", r);
755 
756 		hdr = &sc->sc_tx_hdrs[slot];
757 		memset(hdr, 0, sc->sc_hdr_size);
758 		if (m->m_pkthdr.csum_flags & (M_TCP_CSUM_OUT|M_UDP_CSUM_OUT)) {
759 			struct mbuf *mip;
760 			struct ip *ip;
761 			int ehdrlen = ETHER_HDR_LEN;
762 			int ipoff;
763 #if NVLAN > 0
764 			struct ether_vlan_header *eh;
765 
766 			eh = mtod(m, struct ether_vlan_header *);
767 			if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN))
768 				ehdrlen += ETHER_VLAN_ENCAP_LEN;
769 #endif
770 
771 			if (m->m_pkthdr.csum_flags & M_TCP_CSUM_OUT)
772 				hdr->csum_offset = offsetof(struct tcphdr, th_sum);
773 			else
774 				hdr->csum_offset = offsetof(struct udphdr, uh_sum);
775 
776 			mip = m_getptr(m, ehdrlen, &ipoff);
777 			KASSERT(mip != NULL && mip->m_len - ipoff >= sizeof(*ip));
778 			ip = (struct ip *)(mip->m_data + ipoff);
779 			hdr->csum_start = ehdrlen + (ip->ip_hl << 2);
780 			hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
781 		}
782 
783 		r = vio_encap(sc, slot, m);
784 		if (r != 0) {
785 			virtio_enqueue_abort(vq, slot);
786 			ifq_deq_commit(&ifp->if_snd, m);
787 			m_freem(m);
788 			ifp->if_oerrors++;
789 			continue;
790 		}
791 		r = virtio_enqueue_reserve(vq, slot,
792 		    sc->sc_tx_dmamaps[slot]->dm_nsegs + 1);
793 		if (r != 0) {
794 			bus_dmamap_unload(vsc->sc_dmat,
795 			    sc->sc_tx_dmamaps[slot]);
796 			ifq_deq_rollback(&ifp->if_snd, m);
797 			sc->sc_tx_mbufs[slot] = NULL;
798 			ifq_set_oactive(&ifp->if_snd);
799 			break;
800 		}
801 		ifq_deq_commit(&ifp->if_snd, m);
802 
803 		bus_dmamap_sync(vsc->sc_dmat, sc->sc_tx_dmamaps[slot], 0,
804 		    sc->sc_tx_dmamaps[slot]->dm_mapsize, BUS_DMASYNC_PREWRITE);
805 		VIO_DMAMEM_SYNC(vsc, sc, hdr, sc->sc_hdr_size,
806 		    BUS_DMASYNC_PREWRITE);
807 		VIO_DMAMEM_ENQUEUE(sc, vq, slot, hdr, sc->sc_hdr_size, 1);
808 		virtio_enqueue(vq, slot, sc->sc_tx_dmamaps[slot], 1);
809 		virtio_enqueue_commit(vsc, vq, slot, 0);
810 		queued++;
811 #if NBPFILTER > 0
812 		if (ifp->if_bpf)
813 			bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT);
814 #endif
815 	}
816 	if (ifq_is_oactive(&ifp->if_snd)) {
817 		int r;
818 		if (vsc->sc_features & VIRTIO_F_RING_EVENT_IDX)
819 			r = virtio_postpone_intr_smart(&sc->sc_vq[VQTX]);
820 		else
821 			r = virtio_start_vq_intr(vsc, &sc->sc_vq[VQTX]);
822 		if (r) {
823 			vio_txeof(vq);
824 			goto again;
825 		}
826 	}
827 
828 	if (queued > 0) {
829 		virtio_notify(vsc, vq);
830 		timeout_add_sec(&sc->sc_txtick, 1);
831 	}
832 }
833 
834 #if VIRTIO_DEBUG
835 void
836 vio_dump(struct vio_softc *sc)
837 {
838 	struct ifnet *ifp = &sc->sc_ac.ac_if;
839 	struct virtio_softc *vsc = sc->sc_virtio;
840 
841 	printf("%s status dump:\n", ifp->if_xname);
842 	printf("TX virtqueue:\n");
843 	virtio_vq_dump(&vsc->sc_vqs[VQTX]);
844 	printf("tx tick active: %d\n", !timeout_triggered(&sc->sc_txtick));
845 	printf("rx tick active: %d\n", !timeout_triggered(&sc->sc_rxtick));
846 	printf("RX virtqueue:\n");
847 	virtio_vq_dump(&vsc->sc_vqs[VQRX]);
848 	if (vsc->sc_nvqs == 3) {
849 		printf("CTL virtqueue:\n");
850 		virtio_vq_dump(&vsc->sc_vqs[VQCTL]);
851 		printf("ctrl_inuse: %d\n", sc->sc_ctrl_inuse);
852 	}
853 }
854 #endif
855 
856 int
857 vio_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
858 {
859 	struct vio_softc *sc = ifp->if_softc;
860 	struct ifreq *ifr = (struct ifreq *)data;
861 	int s, r = 0;
862 
863 	s = splnet();
864 	switch (cmd) {
865 	case SIOCSIFADDR:
866 		ifp->if_flags |= IFF_UP;
867 		if (!(ifp->if_flags & IFF_RUNNING))
868 			vio_init(ifp);
869 		break;
870 	case SIOCSIFFLAGS:
871 		if (ifp->if_flags & IFF_UP) {
872 #if VIRTIO_DEBUG
873 			if (ifp->if_flags & IFF_DEBUG)
874 				vio_dump(sc);
875 #endif
876 			if (ifp->if_flags & IFF_RUNNING)
877 				r = ENETRESET;
878 			else
879 				vio_init(ifp);
880 		} else {
881 			if (ifp->if_flags & IFF_RUNNING)
882 				vio_stop(ifp, 1);
883 		}
884 		break;
885 	case SIOCGIFMEDIA:
886 	case SIOCSIFMEDIA:
887 		r = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd);
888 		break;
889 	case SIOCGIFRXR:
890 		r = if_rxr_ioctl((struct if_rxrinfo *)ifr->ifr_data,
891 		    NULL, MCLBYTES, &sc->sc_rx_ring);
892 		break;
893 	default:
894 		r = ether_ioctl(ifp, &sc->sc_ac, cmd, data);
895 	}
896 
897 	if (r == ENETRESET) {
898 		if (ifp->if_flags & IFF_RUNNING)
899 			vio_iff(sc);
900 		r = 0;
901 	}
902 	splx(s);
903 	return r;
904 }
905 
906 /*
907  * Recieve implementation
908  */
909 /* allocate and initialize a mbuf for receive */
910 int
911 vio_add_rx_mbuf(struct vio_softc *sc, int i)
912 {
913 	struct mbuf *m;
914 	int r;
915 
916 	m = MCLGETI(NULL, M_DONTWAIT, NULL, MCLBYTES);
917 	if (m == NULL)
918 		return ENOBUFS;
919 	sc->sc_rx_mbufs[i] = m;
920 	m->m_len = m->m_pkthdr.len = m->m_ext.ext_size;
921 	r = bus_dmamap_load_mbuf(sc->sc_virtio->sc_dmat, sc->sc_rx_dmamaps[i],
922 	    m, BUS_DMA_READ|BUS_DMA_NOWAIT);
923 	if (r) {
924 		m_freem(m);
925 		sc->sc_rx_mbufs[i] = 0;
926 		return r;
927 	}
928 
929 	return 0;
930 }
931 
932 /* free a mbuf for receive */
933 void
934 vio_free_rx_mbuf(struct vio_softc *sc, int i)
935 {
936 	bus_dmamap_unload(sc->sc_virtio->sc_dmat, sc->sc_rx_dmamaps[i]);
937 	m_freem(sc->sc_rx_mbufs[i]);
938 	sc->sc_rx_mbufs[i] = NULL;
939 }
940 
941 /* add mbufs for all the empty receive slots */
942 void
943 vio_populate_rx_mbufs(struct vio_softc *sc)
944 {
945 	struct virtio_softc *vsc = sc->sc_virtio;
946 	int r, done = 0;
947 	u_int slots;
948 	struct virtqueue *vq = &sc->sc_vq[VQRX];
949 	int mrg_rxbuf = VIO_HAVE_MRG_RXBUF(sc);
950 
951 	for (slots = if_rxr_get(&sc->sc_rx_ring, vq->vq_num);
952 	    slots > 0; slots--) {
953 		int slot;
954 		r = virtio_enqueue_prep(vq, &slot);
955 		if (r == EAGAIN)
956 			break;
957 		if (r != 0)
958 			panic("enqueue_prep for rx buffers: %d", r);
959 		if (sc->sc_rx_mbufs[slot] == NULL) {
960 			r = vio_add_rx_mbuf(sc, slot);
961 			if (r != 0) {
962 				virtio_enqueue_abort(vq, slot);
963 				break;
964 			}
965 		}
966 		r = virtio_enqueue_reserve(vq, slot,
967 		    sc->sc_rx_dmamaps[slot]->dm_nsegs + (mrg_rxbuf ? 0 : 1));
968 		if (r != 0) {
969 			vio_free_rx_mbuf(sc, slot);
970 			break;
971 		}
972 		bus_dmamap_sync(vsc->sc_dmat, sc->sc_rx_dmamaps[slot], 0,
973 		    MCLBYTES, BUS_DMASYNC_PREREAD);
974 		if (mrg_rxbuf) {
975 			virtio_enqueue(vq, slot, sc->sc_rx_dmamaps[slot], 0);
976 		} else {
977 			/*
978 			 * Buggy kvm wants a buffer of exactly the size of
979 			 * the header in this case, so we have to split in
980 			 * two.
981 			 */
982 			virtio_enqueue_p(vq, slot, sc->sc_rx_dmamaps[slot],
983 			    0, sc->sc_hdr_size, 0);
984 			virtio_enqueue_p(vq, slot, sc->sc_rx_dmamaps[slot],
985 			    sc->sc_hdr_size, MCLBYTES - sc->sc_hdr_size, 0);
986 		}
987 		virtio_enqueue_commit(vsc, vq, slot, 0);
988 		done = 1;
989 	}
990 	if_rxr_put(&sc->sc_rx_ring, slots);
991 
992 	if (done)
993 		virtio_notify(vsc, vq);
994 	if (vq->vq_used_idx != vq->vq_avail_idx)
995 		timeout_del(&sc->sc_rxtick);
996 	else
997 		timeout_add_sec(&sc->sc_rxtick, 1);
998 }
999 
1000 /* dequeue received packets */
1001 int
1002 vio_rxeof(struct vio_softc *sc)
1003 {
1004 	struct virtio_softc *vsc = sc->sc_virtio;
1005 	struct virtqueue *vq = &sc->sc_vq[VQRX];
1006 	struct ifnet *ifp = &sc->sc_ac.ac_if;
1007 	struct mbuf_list ml = MBUF_LIST_INITIALIZER();
1008 	struct mbuf *m, *m0 = NULL, *mlast;
1009 	int r = 0;
1010 	int slot, len, bufs_left;
1011 	struct virtio_net_hdr *hdr;
1012 
1013 	while (virtio_dequeue(vsc, vq, &slot, &len) == 0) {
1014 		r = 1;
1015 		bus_dmamap_sync(vsc->sc_dmat, sc->sc_rx_dmamaps[slot], 0,
1016 		    MCLBYTES, BUS_DMASYNC_POSTREAD);
1017 		m = sc->sc_rx_mbufs[slot];
1018 		KASSERT(m != NULL);
1019 		bus_dmamap_unload(vsc->sc_dmat, sc->sc_rx_dmamaps[slot]);
1020 		sc->sc_rx_mbufs[slot] = NULL;
1021 		virtio_dequeue_commit(vq, slot);
1022 		if_rxr_put(&sc->sc_rx_ring, 1);
1023 		m->m_len = m->m_pkthdr.len = len;
1024 		m->m_pkthdr.csum_flags = 0;
1025 		if (m0 == NULL) {
1026 			hdr = mtod(m, struct virtio_net_hdr *);
1027 			m_adj(m, sc->sc_hdr_size);
1028 			m0 = mlast = m;
1029 			if (VIO_HAVE_MRG_RXBUF(sc))
1030 				bufs_left = hdr->num_buffers - 1;
1031 			else
1032 				bufs_left = 0;
1033 		}
1034 		else {
1035 			m->m_flags &= ~M_PKTHDR;
1036 			m0->m_pkthdr.len += m->m_len;
1037 			mlast->m_next = m;
1038 			mlast = m;
1039 			bufs_left--;
1040 		}
1041 
1042 		if (bufs_left == 0) {
1043 			ml_enqueue(&ml, m0);
1044 			m0 = NULL;
1045 		}
1046 	}
1047 	if (m0 != NULL) {
1048 		DPRINTF("%s: expected %d buffers, got %d\n", __func__,
1049 		    (int)hdr->num_buffers,
1050 		    (int)hdr->num_buffers - bufs_left);
1051 		ifp->if_ierrors++;
1052 		m_freem(m0);
1053 	}
1054 
1055 	if_input(ifp, &ml);
1056 	return r;
1057 }
1058 
1059 int
1060 vio_rx_intr(struct virtqueue *vq)
1061 {
1062 	struct virtio_softc *vsc = vq->vq_owner;
1063 	struct vio_softc *sc = (struct vio_softc *)vsc->sc_child;
1064 	int r, sum = 0;
1065 
1066 again:
1067 	r = vio_rxeof(sc);
1068 	sum += r;
1069 	if (r) {
1070 		vio_populate_rx_mbufs(sc);
1071 		/* set used event index to the next slot */
1072 		if (vsc->sc_features & VIRTIO_F_RING_EVENT_IDX) {
1073 			if (virtio_start_vq_intr(vq->vq_owner, vq))
1074 				goto again;
1075 		}
1076 	}
1077 
1078 	return sum;
1079 }
1080 
1081 void
1082 vio_rxtick(void *arg)
1083 {
1084 	struct virtqueue *vq = arg;
1085 	struct virtio_softc *vsc = vq->vq_owner;
1086 	struct vio_softc *sc = (struct vio_softc *)vsc->sc_child;
1087 	int s;
1088 
1089 	s = splnet();
1090 	vio_populate_rx_mbufs(sc);
1091 	splx(s);
1092 }
1093 
1094 /* free all the mbufs; called from if_stop(disable) */
1095 void
1096 vio_rx_drain(struct vio_softc *sc)
1097 {
1098 	struct virtqueue *vq = &sc->sc_vq[VQRX];
1099 	int i;
1100 
1101 	for (i = 0; i < vq->vq_num; i++) {
1102 		if (sc->sc_rx_mbufs[i] == NULL)
1103 			continue;
1104 		vio_free_rx_mbuf(sc, i);
1105 	}
1106 }
1107 
1108 /*
1109  * Transmition implementation
1110  */
1111 /* actual transmission is done in if_start */
1112 /* tx interrupt; dequeue and free mbufs */
1113 /*
1114  * tx interrupt is actually disabled unless the tx queue is full, i.e.
1115  * IFF_OACTIVE is set. vio_txtick is used to make sure that mbufs
1116  * are dequeued and freed even if no further transfer happens.
1117  */
1118 int
1119 vio_tx_intr(struct virtqueue *vq)
1120 {
1121 	struct virtio_softc *vsc = vq->vq_owner;
1122 	struct vio_softc *sc = (struct vio_softc *)vsc->sc_child;
1123 	struct ifnet *ifp = &sc->sc_ac.ac_if;
1124 	int r;
1125 
1126 	r = vio_txeof(vq);
1127 	vio_start(ifp);
1128 	return r;
1129 }
1130 
1131 void
1132 vio_txtick(void *arg)
1133 {
1134 	struct virtqueue *vq = arg;
1135 	int s = splnet();
1136 	vio_tx_intr(vq);
1137 	splx(s);
1138 }
1139 
1140 int
1141 vio_txeof(struct virtqueue *vq)
1142 {
1143 	struct virtio_softc *vsc = vq->vq_owner;
1144 	struct vio_softc *sc = (struct vio_softc *)vsc->sc_child;
1145 	struct ifnet *ifp = &sc->sc_ac.ac_if;
1146 	struct mbuf *m;
1147 	int r = 0;
1148 	int slot, len;
1149 
1150 	while (virtio_dequeue(vsc, vq, &slot, &len) == 0) {
1151 		struct virtio_net_hdr *hdr = &sc->sc_tx_hdrs[slot];
1152 		r++;
1153 		VIO_DMAMEM_SYNC(vsc, sc, hdr, sc->sc_hdr_size,
1154 		    BUS_DMASYNC_POSTWRITE);
1155 		bus_dmamap_sync(vsc->sc_dmat, sc->sc_tx_dmamaps[slot], 0,
1156 		    sc->sc_tx_dmamaps[slot]->dm_mapsize,
1157 		    BUS_DMASYNC_POSTWRITE);
1158 		m = sc->sc_tx_mbufs[slot];
1159 		bus_dmamap_unload(vsc->sc_dmat, sc->sc_tx_dmamaps[slot]);
1160 		sc->sc_tx_mbufs[slot] = 0;
1161 		virtio_dequeue_commit(vq, slot);
1162 		m_freem(m);
1163 	}
1164 
1165 	if (r) {
1166 		ifq_clr_oactive(&ifp->if_snd);
1167 		virtio_stop_vq_intr(vsc, &sc->sc_vq[VQTX]);
1168 	}
1169 	if (vq->vq_used_idx == vq->vq_avail_idx)
1170 		timeout_del(&sc->sc_txtick);
1171 	else if (r)
1172 		timeout_add_sec(&sc->sc_txtick, 1);
1173 	return r;
1174 }
1175 
1176 int
1177 vio_encap(struct vio_softc *sc, int slot, struct mbuf *m)
1178 {
1179 	struct virtio_softc	*vsc = sc->sc_virtio;
1180 	bus_dmamap_t		 dmap= sc->sc_tx_dmamaps[slot];
1181 	int			 r;
1182 
1183 	r = bus_dmamap_load_mbuf(vsc->sc_dmat, dmap, m,
1184 	    BUS_DMA_WRITE|BUS_DMA_NOWAIT);
1185 	switch (r) {
1186 	case 0:
1187 		break;
1188 	case EFBIG:
1189 		if (m_defrag(m, M_DONTWAIT) == 0 &&
1190 		    bus_dmamap_load_mbuf(vsc->sc_dmat, dmap, m,
1191 		    BUS_DMA_WRITE|BUS_DMA_NOWAIT) == 0)
1192 			break;
1193 
1194 		/* FALLTHROUGH */
1195 	default:
1196 		return ENOBUFS;
1197 	}
1198 	sc->sc_tx_mbufs[slot] = m;
1199 	return 0;
1200 }
1201 
1202 /* free all the mbufs already put on vq; called from if_stop(disable) */
1203 void
1204 vio_tx_drain(struct vio_softc *sc)
1205 {
1206 	struct virtio_softc *vsc = sc->sc_virtio;
1207 	struct virtqueue *vq = &sc->sc_vq[VQTX];
1208 	int i;
1209 
1210 	for (i = 0; i < vq->vq_num; i++) {
1211 		if (sc->sc_tx_mbufs[i] == NULL)
1212 			continue;
1213 		bus_dmamap_unload(vsc->sc_dmat, sc->sc_tx_dmamaps[i]);
1214 		m_freem(sc->sc_tx_mbufs[i]);
1215 		sc->sc_tx_mbufs[i] = NULL;
1216 	}
1217 }
1218 
1219 /*
1220  * Control vq
1221  */
1222 /* issue a VIRTIO_NET_CTRL_RX class command and wait for completion */
1223 int
1224 vio_ctrl_rx(struct vio_softc *sc, int cmd, int onoff)
1225 {
1226 	struct virtio_softc *vsc = sc->sc_virtio;
1227 	struct virtqueue *vq = &sc->sc_vq[VQCTL];
1228 	int r, slot;
1229 
1230 	splassert(IPL_NET);
1231 
1232 	if ((r = vio_wait_ctrl(sc)) != 0)
1233 		return r;
1234 
1235 	sc->sc_ctrl_cmd->class = VIRTIO_NET_CTRL_RX;
1236 	sc->sc_ctrl_cmd->command = cmd;
1237 	sc->sc_ctrl_rx->onoff = onoff;
1238 
1239 	VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_cmd,
1240 	    sizeof(*sc->sc_ctrl_cmd), BUS_DMASYNC_PREWRITE);
1241 	VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_rx,
1242 	    sizeof(*sc->sc_ctrl_rx), BUS_DMASYNC_PREWRITE);
1243 	VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_status,
1244 	    sizeof(*sc->sc_ctrl_status), BUS_DMASYNC_PREREAD);
1245 
1246 	r = virtio_enqueue_prep(vq, &slot);
1247 	if (r != 0)
1248 		panic("%s: control vq busy!?", sc->sc_dev.dv_xname);
1249 	r = virtio_enqueue_reserve(vq, slot, 3);
1250 	if (r != 0)
1251 		panic("%s: control vq busy!?", sc->sc_dev.dv_xname);
1252 	VIO_DMAMEM_ENQUEUE(sc, vq, slot, sc->sc_ctrl_cmd,
1253 	    sizeof(*sc->sc_ctrl_cmd), 1);
1254 	VIO_DMAMEM_ENQUEUE(sc, vq, slot, sc->sc_ctrl_rx,
1255 	    sizeof(*sc->sc_ctrl_rx), 1);
1256 	VIO_DMAMEM_ENQUEUE(sc, vq, slot, sc->sc_ctrl_status,
1257 	    sizeof(*sc->sc_ctrl_status), 0);
1258 	virtio_enqueue_commit(vsc, vq, slot, 1);
1259 
1260 	if ((r = vio_wait_ctrl_done(sc)) != 0)
1261 		goto out;
1262 
1263 	VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_cmd,
1264 	    sizeof(*sc->sc_ctrl_cmd), BUS_DMASYNC_POSTWRITE);
1265 	VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_rx,
1266 	    sizeof(*sc->sc_ctrl_rx), BUS_DMASYNC_POSTWRITE);
1267 	VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_status,
1268 	    sizeof(*sc->sc_ctrl_status), BUS_DMASYNC_POSTREAD);
1269 
1270 	if (sc->sc_ctrl_status->ack == VIRTIO_NET_OK) {
1271 		r = 0;
1272 	} else {
1273 		printf("%s: ctrl cmd %d failed\n", sc->sc_dev.dv_xname, cmd);
1274 		r = EIO;
1275 	}
1276 
1277 	DPRINTF("%s: cmd %d %d: %d\n", __func__, cmd, (int)onoff, r);
1278 out:
1279 	vio_ctrl_wakeup(sc, FREE);
1280 	return r;
1281 }
1282 
1283 /*
1284  * XXXSMP As long as some per-ifp ioctl(2)s are executed with the
1285  * NET_LOCK() deadlocks are possible.  So release it here.
1286  */
1287 static inline int
1288 vio_sleep(struct vio_softc *sc, const char *wmesg)
1289 {
1290 	int status = rw_status(&netlock);
1291 
1292 	if (status != RW_WRITE && status != RW_READ)
1293 		return tsleep(&sc->sc_ctrl_inuse, PRIBIO|PCATCH, wmesg, 0);
1294 
1295 	return rwsleep(&sc->sc_ctrl_inuse, &netlock, PRIBIO|PCATCH, wmesg, 0);
1296 }
1297 
1298 int
1299 vio_wait_ctrl(struct vio_softc *sc)
1300 {
1301 	int r = 0;
1302 
1303 	while (sc->sc_ctrl_inuse != FREE) {
1304 		r = vio_sleep(sc, "viowait");
1305 		if (r == EINTR)
1306 			return r;
1307 	}
1308 	sc->sc_ctrl_inuse = INUSE;
1309 
1310 	return r;
1311 }
1312 
1313 int
1314 vio_wait_ctrl_done(struct vio_softc *sc)
1315 {
1316 	int r = 0;
1317 
1318 	while (sc->sc_ctrl_inuse != DONE && sc->sc_ctrl_inuse != RESET) {
1319 		if (sc->sc_ctrl_inuse == RESET) {
1320 			r = 1;
1321 			break;
1322 		}
1323 		r = vio_sleep(sc, "viodone");
1324 		if (r == EINTR)
1325 			break;
1326 	}
1327 	return r;
1328 }
1329 
1330 void
1331 vio_ctrl_wakeup(struct vio_softc *sc, enum vio_ctrl_state new)
1332 {
1333 	sc->sc_ctrl_inuse = new;
1334 	wakeup(&sc->sc_ctrl_inuse);
1335 }
1336 
1337 int
1338 vio_ctrleof(struct virtqueue *vq)
1339 {
1340 	struct virtio_softc *vsc = vq->vq_owner;
1341 	struct vio_softc *sc = (struct vio_softc *)vsc->sc_child;
1342 	int r = 0, ret, slot;
1343 
1344 again:
1345 	ret = virtio_dequeue(vsc, vq, &slot, NULL);
1346 	if (ret == ENOENT)
1347 		return r;
1348 	virtio_dequeue_commit(vq, slot);
1349 	r++;
1350 	vio_ctrl_wakeup(sc, DONE);
1351 	if (virtio_start_vq_intr(vsc, vq))
1352 		goto again;
1353 
1354 	return r;
1355 }
1356 
1357 /* issue VIRTIO_NET_CTRL_MAC_TABLE_SET command and wait for completion */
1358 int
1359 vio_set_rx_filter(struct vio_softc *sc)
1360 {
1361 	/* filter already set in sc_ctrl_mac_tbl */
1362 	struct virtio_softc *vsc = sc->sc_virtio;
1363 	struct virtqueue *vq = &sc->sc_vq[VQCTL];
1364 	int r, slot;
1365 
1366 	splassert(IPL_NET);
1367 
1368 	if ((r = vio_wait_ctrl(sc)) != 0)
1369 		return r;
1370 
1371 	sc->sc_ctrl_cmd->class = VIRTIO_NET_CTRL_MAC;
1372 	sc->sc_ctrl_cmd->command = VIRTIO_NET_CTRL_MAC_TABLE_SET;
1373 
1374 	VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_cmd,
1375 	    sizeof(*sc->sc_ctrl_cmd), BUS_DMASYNC_PREWRITE);
1376 	VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_mac_info,
1377 	    VIO_CTRL_MAC_INFO_SIZE, BUS_DMASYNC_PREWRITE);
1378 	VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_status,
1379 	    sizeof(*sc->sc_ctrl_status), BUS_DMASYNC_PREREAD);
1380 
1381 	r = virtio_enqueue_prep(vq, &slot);
1382 	if (r != 0)
1383 		panic("%s: control vq busy!?", sc->sc_dev.dv_xname);
1384 	r = virtio_enqueue_reserve(vq, slot, 4);
1385 	if (r != 0)
1386 		panic("%s: control vq busy!?", sc->sc_dev.dv_xname);
1387 	VIO_DMAMEM_ENQUEUE(sc, vq, slot, sc->sc_ctrl_cmd,
1388 	    sizeof(*sc->sc_ctrl_cmd), 1);
1389 	VIO_DMAMEM_ENQUEUE(sc, vq, slot, sc->sc_ctrl_mac_tbl_uc,
1390 	    sizeof(*sc->sc_ctrl_mac_tbl_uc) +
1391 	    sc->sc_ctrl_mac_tbl_uc->nentries * ETHER_ADDR_LEN, 1);
1392 	VIO_DMAMEM_ENQUEUE(sc, vq, slot, sc->sc_ctrl_mac_tbl_mc,
1393 	    sizeof(*sc->sc_ctrl_mac_tbl_mc) +
1394 	    sc->sc_ctrl_mac_tbl_mc->nentries * ETHER_ADDR_LEN, 1);
1395 	VIO_DMAMEM_ENQUEUE(sc, vq, slot, sc->sc_ctrl_status,
1396 	    sizeof(*sc->sc_ctrl_status), 0);
1397 	virtio_enqueue_commit(vsc, vq, slot, 1);
1398 
1399 	if ((r = vio_wait_ctrl_done(sc)) != 0)
1400 		goto out;
1401 
1402 	VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_cmd,
1403 	    sizeof(*sc->sc_ctrl_cmd), BUS_DMASYNC_POSTWRITE);
1404 	VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_mac_info,
1405 	    VIO_CTRL_MAC_INFO_SIZE, BUS_DMASYNC_POSTWRITE);
1406 	VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_status,
1407 	    sizeof(*sc->sc_ctrl_status), BUS_DMASYNC_POSTREAD);
1408 
1409 	if (sc->sc_ctrl_status->ack == VIRTIO_NET_OK) {
1410 		r = 0;
1411 	} else {
1412 		/* The host's filter table is not large enough */
1413 		printf("%s: failed setting rx filter\n", sc->sc_dev.dv_xname);
1414 		r = EIO;
1415 	}
1416 
1417 out:
1418 	vio_ctrl_wakeup(sc, FREE);
1419 	return r;
1420 }
1421 
1422 void
1423 vio_iff(struct vio_softc *sc)
1424 {
1425 	struct virtio_softc *vsc = sc->sc_virtio;
1426 	struct ifnet *ifp = &sc->sc_ac.ac_if;
1427 	struct arpcom *ac = &sc->sc_ac;
1428 	struct ether_multi *enm;
1429 	struct ether_multistep step;
1430 	int nentries = 0;
1431 	int promisc = 0, allmulti = 0, rxfilter = 0;
1432 	int r;
1433 
1434 	splassert(IPL_NET);
1435 
1436 	ifp->if_flags &= ~IFF_ALLMULTI;
1437 
1438 	if (vsc->sc_nvqs < 3) {
1439 		/* no ctrl vq; always promisc */
1440 		ifp->if_flags |= IFF_ALLMULTI | IFF_PROMISC;
1441 		return;
1442 	}
1443 
1444 	if (sc->sc_dev.dv_cfdata->cf_flags & CONFFLAG_QEMU_VLAN_BUG)
1445 		ifp->if_flags |= IFF_PROMISC;
1446 
1447 	if (ifp->if_flags & IFF_PROMISC || ac->ac_multirangecnt > 0 ||
1448 	    ac->ac_multicnt >= VIRTIO_NET_CTRL_MAC_MC_ENTRIES) {
1449 		ifp->if_flags |= IFF_ALLMULTI;
1450 		if (ifp->if_flags & IFF_PROMISC)
1451 			promisc = 1;
1452 		else
1453 			allmulti = 1;
1454 	} else {
1455 		rxfilter = 1;
1456 
1457 		ETHER_FIRST_MULTI(step, ac, enm);
1458 		while (enm != NULL) {
1459 			memcpy(sc->sc_ctrl_mac_tbl_mc->macs[nentries++],
1460 			    enm->enm_addrlo, ETHER_ADDR_LEN);
1461 
1462 			ETHER_NEXT_MULTI(step, enm);
1463 		}
1464 	}
1465 
1466 	/* set unicast address, VirtualBox wants that */
1467 	memcpy(sc->sc_ctrl_mac_tbl_uc->macs[0], ac->ac_enaddr, ETHER_ADDR_LEN);
1468 	sc->sc_ctrl_mac_tbl_uc->nentries = 1;
1469 
1470 	sc->sc_ctrl_mac_tbl_mc->nentries = rxfilter ? nentries : 0;
1471 
1472 	if (vsc->sc_nvqs < 3)
1473 		return;
1474 
1475 	r = vio_set_rx_filter(sc);
1476 	if (r == EIO)
1477 		allmulti = 1; /* fallback */
1478 	else if (r != 0)
1479 		return;
1480 
1481 	r = vio_ctrl_rx(sc, VIRTIO_NET_CTRL_RX_ALLMULTI, allmulti);
1482 	if (r == EIO)
1483 		promisc = 1; /* fallback */
1484 	else if (r != 0)
1485 		return;
1486 
1487 	vio_ctrl_rx(sc, VIRTIO_NET_CTRL_RX_PROMISC, promisc);
1488 }
1489