xref: /dflybsd-src/sys/dev/virtual/vkernel/net/if_vke.c (revision a7a9566230a8b1ccb77f407588ebc35b9a831a1e)
1 /*
2  * Copyright (c) 2007 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Sepherosa Ziehau <sepherosa@gmail.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  * $DragonFly: src/sys/dev/virtual/net/if_vke.c,v 1.10 2008/05/27 23:44:46 dillon Exp $
35  */
36 
37 #include <sys/param.h>
38 #include <sys/endian.h>
39 #include <sys/kernel.h>
40 #include <sys/malloc.h>
41 #include <sys/proc.h>
42 #include <sys/serialize.h>
43 #include <sys/socket.h>
44 #include <sys/sockio.h>
45 #include <sys/sysctl.h>
46 
47 #include <machine/md_var.h>
48 #include <machine/cothread.h>
49 
50 #include <net/ethernet.h>
51 #include <net/if.h>
52 #include <net/bpf.h>
53 #include <net/if_arp.h>
54 #include <net/ifq_var.h>
55 
56 #include <netinet/in_var.h>
57 
58 #include <sys/stat.h>
59 #include <net/tap/if_tap.h>
60 #include <err.h>
61 #include <errno.h>
62 #include <stdio.h>
63 #include <string.h>
64 #include <unistd.h>
65 #include <fcntl.h>
66 
67 #define VKE_DEVNAME		"vke"
68 
69 #define VKE_CHUNK	8 /* number of mbufs to queue before interrupting */
70 
71 #define NETFIFOSIZE	256
72 #define NETFIFOMASK	(NETFIFOSIZE -1)
73 #define NETFIFOINDEX(u) ((u) & NETFIFOMASK)
74 
75 #define VKE_COTD_RUN	0
76 #define VKE_COTD_EXIT	1
77 #define VKE_COTD_DEAD	2
78 
79 struct vke_fifo {
80 	struct mbuf	*array[NETFIFOSIZE];
81 	int		rindex;
82 	int		windex;
83 };
84 typedef struct vke_fifo *fifo_t;
85 
86 struct vke_softc {
87 	struct arpcom		arpcom;
88 	int			sc_fd;
89 	int			sc_unit;
90 
91 	cothread_t		cotd_tx;
92 	cothread_t		cotd_rx;
93 
94 	int			cotd_tx_exit;
95 	int			cotd_rx_exit;
96 
97 	void			*sc_txbuf;
98 	int			sc_txbuf_len;
99 
100 	fifo_t			sc_txfifo;
101 	fifo_t			sc_txfifo_done;
102 	fifo_t			sc_rxfifo;
103 
104 	struct sysctl_ctx_list	sc_sysctl_ctx;
105 	struct sysctl_oid	*sc_sysctl_tree;
106 
107 	int			sc_tap_unit;	/* unit of backend tap(4) */
108 	in_addr_t		sc_addr;	/* address */
109 	in_addr_t		sc_mask;	/* netmask */
110 };
111 
112 static void	vke_start(struct ifnet *);
113 static void	vke_init(void *);
114 static int	vke_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
115 
116 static int	vke_attach(const struct vknetif_info *, int);
117 static int	vke_stop(struct vke_softc *);
118 static int	vke_init_addr(struct ifnet *, in_addr_t, in_addr_t);
119 static void	vke_tx_intr(cothread_t cotd);
120 static void	vke_tx_thread(cothread_t cotd);
121 static void	vke_rx_intr(cothread_t cotd);
122 static void	vke_rx_thread(cothread_t cotd);
123 
124 static int vke_txfifo_enqueue(struct vke_softc *sc, struct mbuf *m);
125 static struct mbuf *vke_txfifo_dequeue(struct vke_softc *sc);
126 
127 static int vke_txfifo_done_enqueue(struct vke_softc *sc, struct mbuf *m);
128 static struct mbuf * vke_txfifo_done_dequeue(struct vke_softc *sc, struct mbuf *nm);
129 
130 static struct mbuf *vke_rxfifo_dequeue(struct vke_softc *sc, struct mbuf *nm);
131 static struct mbuf *vke_rxfifo_sniff(struct vke_softc *sc);
132 
133 static void
134 vke_sysinit(void *arg __unused)
135 {
136 	int i, unit;
137 
138 	KASSERT(NetifNum <= VKNETIF_MAX, ("too many netifs: %d\n", NetifNum));
139 
140 	unit = 0;
141 	for (i = 0; i < NetifNum; ++i) {
142 		if (vke_attach(&NetifInfo[i], unit) == 0)
143 			++unit;
144 	}
145 }
146 SYSINIT(vke, SI_SUB_DRIVERS, SI_ORDER_MIDDLE, vke_sysinit, NULL);
147 
148 /*
149  * vke_txfifo_done_enqueue() - Add an mbuf to the transmit done fifo.  Since
150  * the cothread cannot free transmit mbufs after processing we put them on
151  * the done fifo so the kernel can free them.
152  */
153 static int
154 vke_txfifo_done_enqueue(struct vke_softc *sc, struct mbuf *m)
155 {
156 	fifo_t fifo = sc->sc_txfifo_done;
157 
158 	while (NETFIFOINDEX(fifo->windex + 1) == NETFIFOINDEX(fifo->rindex)) {
159 		usleep(20000);
160 	}
161 
162 	fifo->array[NETFIFOINDEX(fifo->windex)] = m;
163 	cpu_sfence();
164 	++fifo->windex;
165 	return (0);
166 }
167 
168 /*
169  * vke_txfifo_done_dequeue() - Remove an mbuf from the transmit done fifo.
170  */
171 static struct mbuf *
172 vke_txfifo_done_dequeue(struct vke_softc *sc, struct mbuf *nm)
173 {
174 	fifo_t fifo = sc->sc_txfifo_done;
175 	struct mbuf *m;
176 
177 	if (NETFIFOINDEX(fifo->rindex) == NETFIFOINDEX(fifo->windex))
178 		return (NULL);
179 
180 	m = fifo->array[NETFIFOINDEX(fifo->rindex)];
181 	fifo->array[NETFIFOINDEX(fifo->rindex)] = nm;
182 	cpu_lfence();
183 	++fifo->rindex;
184 	return (m);
185 }
186 
187 /*
188  * vke_txfifo_enqueue() - Add an mbuf to the transmit fifo.
189  */
190 static int
191 vke_txfifo_enqueue(struct vke_softc *sc, struct mbuf *m)
192 {
193 	fifo_t fifo = sc->sc_txfifo;
194 
195 	if (NETFIFOINDEX(fifo->windex + 1) == NETFIFOINDEX(fifo->rindex))
196 		return (-1);
197 
198 	fifo->array[NETFIFOINDEX(fifo->windex)] = m;
199 	cpu_sfence();
200 	++fifo->windex;
201 
202 	return (0);
203 }
204 
205 /*
206  * vke_txfifo_dequeue() - Return next mbuf on the transmit fifo if one
207  * exists.
208  */
209 static struct mbuf *
210 vke_txfifo_dequeue(struct vke_softc *sc)
211 {
212 	fifo_t fifo = sc->sc_txfifo;
213 	struct mbuf *m;
214 
215 	if (NETFIFOINDEX(fifo->rindex) == NETFIFOINDEX(fifo->windex))
216 		return (NULL);
217 
218 	m = fifo->array[NETFIFOINDEX(fifo->rindex)];
219 	fifo->array[NETFIFOINDEX(fifo->rindex)] = NULL;
220 
221 	cpu_lfence();
222 	++fifo->rindex;
223 	return (m);
224 }
225 
226 static int
227 vke_txfifo_empty(struct vke_softc *sc)
228 {
229 	fifo_t fifo = sc->sc_txfifo;
230 
231 	if (NETFIFOINDEX(fifo->rindex) == NETFIFOINDEX(fifo->windex))
232 		return (1);
233 	return(0);
234 }
235 
236 /*
237  * vke_rxfifo_dequeue() - Return next mbuf on the receice fifo if one
238  * exists replacing it with newm which should point to a newly allocated
239  * mbuf.
240  */
241 static struct mbuf *
242 vke_rxfifo_dequeue(struct vke_softc *sc, struct mbuf *newm)
243 {
244 	fifo_t fifo = sc->sc_rxfifo;
245 	struct mbuf *m;
246 
247 	if (NETFIFOINDEX(fifo->rindex) == NETFIFOINDEX(fifo->windex))
248 		return (NULL);
249 
250 	m = fifo->array[NETFIFOINDEX(fifo->rindex)];
251 	fifo->array[NETFIFOINDEX(fifo->rindex)] = newm;
252 	cpu_lfence();
253 	++fifo->rindex;
254 	return (m);
255 }
256 
257 /*
258  * Return the next mbuf if available but do NOT remove it from the FIFO.
259  */
260 static struct mbuf *
261 vke_rxfifo_sniff(struct vke_softc *sc)
262 {
263 	fifo_t fifo = sc->sc_rxfifo;
264 	struct mbuf *m;
265 
266 	if (NETFIFOINDEX(fifo->rindex) == NETFIFOINDEX(fifo->windex))
267 		return (NULL);
268 
269 	m = fifo->array[NETFIFOINDEX(fifo->rindex)];
270 	cpu_lfence();
271 	return (m);
272 }
273 
274 static void
275 vke_init(void *xsc)
276 {
277 	struct vke_softc *sc = xsc;
278 	struct ifnet *ifp = &sc->arpcom.ac_if;
279 	int i;
280 
281 	ASSERT_SERIALIZED(ifp->if_serializer);
282 
283 	vke_stop(sc);
284 
285 	ifp->if_flags |= IFF_RUNNING;
286 	ifp->if_flags &= ~IFF_OACTIVE;
287 
288 	sc->sc_txfifo = kmalloc(sizeof(*sc->sc_txfifo), M_DEVBUF, M_WAITOK);
289 	sc->sc_txfifo_done = kmalloc(sizeof(*sc->sc_txfifo_done), M_DEVBUF, M_WAITOK);
290 
291 	sc->sc_rxfifo = kmalloc(sizeof(*sc->sc_rxfifo), M_DEVBUF, M_WAITOK);
292 	for (i = 0; i < NETFIFOSIZE; i++) {
293 		sc->sc_rxfifo->array[i] = m_getcl(MB_WAIT, MT_DATA, M_PKTHDR);
294 		sc->sc_txfifo->array[i] = NULL;
295 		sc->sc_txfifo_done->array[i] = NULL;
296 	}
297 
298 	sc->cotd_tx_exit = sc->cotd_rx_exit = VKE_COTD_RUN;
299 	sc->cotd_tx = cothread_create(vke_tx_thread, vke_tx_intr, sc, "vke_tx");
300 	sc->cotd_rx = cothread_create(vke_rx_thread, vke_rx_intr, sc, "vke_rx");
301 
302 	if (sc->sc_addr != 0) {
303 		in_addr_t addr, mask;
304 
305 		addr = sc->sc_addr;
306 		mask = sc->sc_mask;
307 
308 		/*
309 		 * Make sure vkernel assigned
310 		 * address will not be added
311 		 * again.
312 		 */
313 		sc->sc_addr = 0;
314 		sc->sc_mask = 0;
315 
316 		vke_init_addr(ifp, addr, mask);
317 	}
318 
319 }
320 
321 /*
322  * Called from kernel.
323  *
324  * NOTE: We can't make any kernel callbacks while holding cothread lock
325  *	 because the cothread lock is not governed by the kernel scheduler
326  *	 (so mplock, tokens, etc will not be released).
327  */
328 static void
329 vke_start(struct ifnet *ifp)
330 {
331 	struct vke_softc *sc = ifp->if_softc;
332 	struct mbuf *m;
333 	cothread_t cotd = sc->cotd_tx;
334 	int count;
335 
336 	ASSERT_SERIALIZED(ifp->if_serializer);
337 
338 	if ((ifp->if_flags & (IFF_RUNNING | IFF_OACTIVE)) != IFF_RUNNING)
339 		return;
340 
341 	count = 0;
342 	while ((m = ifq_dequeue(&ifp->if_snd, NULL)) != NULL) {
343 		if (vke_txfifo_enqueue(sc, m) != -1) {
344 			if (count++ == VKE_CHUNK) {
345 				cothread_lock(cotd, 0);
346 				cothread_signal(cotd);
347 				cothread_unlock(cotd, 0);
348 				count = 0;
349 			}
350 		} else {
351 			m_freem(m);
352 		}
353 	}
354 	if (count) {
355 		cothread_lock(cotd, 0);
356 		cothread_signal(cotd);
357 		cothread_unlock(cotd, 0);
358 	}
359 }
360 
361 static int
362 vke_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data, struct ucred *cr)
363 {
364 	struct vke_softc *sc = ifp->if_softc;
365 	int error = 0;
366 
367 	ASSERT_SERIALIZED(ifp->if_serializer);
368 
369 	switch (cmd) {
370 	case SIOCSIFFLAGS:
371 		if (ifp->if_flags & IFF_UP) {
372 			if ((ifp->if_flags & IFF_RUNNING) == 0)
373 				vke_init(sc);
374 		} else {
375 			if (ifp->if_flags & IFF_RUNNING)
376 				vke_stop(sc);
377 		}
378 		break;
379 	case SIOCGIFMEDIA:
380 	case SIOCSIFMEDIA:
381 		error = EOPNOTSUPP;
382 		/* TODO */
383 		break;
384 	case SIOCGIFSTATUS: {
385 		struct ifstat *ifs = (struct ifstat *)data;
386 		int len;
387 
388 		len = strlen(ifs->ascii);
389 		if (len < sizeof(ifs->ascii)) {
390 			ksnprintf(ifs->ascii + len, sizeof(ifs->ascii) - len,
391 				  "\tBacked by tap%d\n", sc->sc_tap_unit);
392 		}
393 		break;
394 	}
395 	case SIOCSIFADDR:
396 		if (((struct ifaddr *)data)->ifa_addr->sa_family == AF_INET) {
397 			/*
398 			 * If we are explicitly requested to change address,
399 			 * we should invalidate address/netmask passed in
400 			 * from vkernel command line.
401 			 */
402 			sc->sc_addr = 0;
403 			sc->sc_mask = 0;
404 		}
405 		/* FALL THROUGH */
406 	default:
407 		error = ether_ioctl(ifp, cmd, data);
408 		break;
409 	}
410 	return error;
411 }
412 
413 static int
414 vke_stop(struct vke_softc *sc)
415 {
416 	struct ifnet *ifp = &sc->arpcom.ac_if;
417 	int i;
418 
419 	ASSERT_SERIALIZED(ifp->if_serializer);
420 
421 	ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);
422 
423 	if (sc) {
424 		if (sc->cotd_tx) {
425 			cothread_lock(sc->cotd_tx, 0);
426 			if (sc->cotd_tx_exit == VKE_COTD_RUN)
427 				sc->cotd_tx_exit = VKE_COTD_EXIT;
428 			cothread_signal(sc->cotd_tx);
429 			cothread_unlock(sc->cotd_tx, 0);
430 			cothread_delete(&sc->cotd_tx);
431 		}
432 		if (sc->cotd_rx) {
433 			cothread_lock(sc->cotd_rx, 0);
434 			if (sc->cotd_rx_exit == VKE_COTD_RUN)
435 				sc->cotd_rx_exit = VKE_COTD_EXIT;
436 			cothread_signal(sc->cotd_rx);
437 			cothread_unlock(sc->cotd_rx, 0);
438 			cothread_delete(&sc->cotd_rx);
439 		}
440 
441 		for (i = 0; i < NETFIFOSIZE; i++) {
442 			if (sc->sc_rxfifo && sc->sc_rxfifo->array[i]) {
443 				m_freem(sc->sc_rxfifo->array[i]);
444 				sc->sc_rxfifo->array[i] = NULL;
445 			}
446 			if (sc->sc_txfifo && sc->sc_txfifo->array[i]) {
447 				m_freem(sc->sc_txfifo->array[i]);
448 				sc->sc_txfifo->array[i] = NULL;
449 			}
450 			if (sc->sc_txfifo_done && sc->sc_txfifo_done->array[i]) {
451 				m_freem(sc->sc_txfifo_done->array[i]);
452 				sc->sc_txfifo_done->array[i] = NULL;
453 			}
454 		}
455 
456 		if (sc->sc_txfifo) {
457 			kfree(sc->sc_txfifo, M_DEVBUF);
458 			sc->sc_txfifo = NULL;
459 		}
460 
461 		if (sc->sc_txfifo_done) {
462 			kfree(sc->sc_txfifo_done, M_DEVBUF);
463 			sc->sc_txfifo_done = NULL;
464 		}
465 
466 		if (sc->sc_rxfifo) {
467 			kfree(sc->sc_rxfifo, M_DEVBUF);
468 			sc->sc_rxfifo = NULL;
469 		}
470 	}
471 
472 
473 	return 0;
474 }
475 
476 /*
477  * vke_rx_intr() is the interrupt function for the receive cothread.
478  */
479 static void
480 vke_rx_intr(cothread_t cotd)
481 {
482 	struct mbuf *m;
483 	struct mbuf *nm;
484 	struct vke_softc *sc = cotd->arg;
485 	struct ifnet *ifp = &sc->arpcom.ac_if;
486 	static int count = 0;
487 
488 	ifnet_serialize_all(ifp);
489 	cothread_lock(cotd, 0);
490 
491 	if (sc->cotd_rx_exit != VKE_COTD_RUN) {
492 		cothread_unlock(cotd, 0);
493 		ifnet_deserialize_all(ifp);
494 		return;
495 	}
496 	cothread_unlock(cotd, 0);
497 
498 	while ((m = vke_rxfifo_sniff(sc)) != NULL) {
499 		nm = m_getcl(MB_DONTWAIT, MT_DATA, M_PKTHDR);
500 		if (nm) {
501 			vke_rxfifo_dequeue(sc, nm);
502 			ifp->if_input(ifp, m);
503 			if (count++ == VKE_CHUNK) {
504 				cothread_lock(cotd, 0);
505 				cothread_signal(cotd);
506 				cothread_unlock(cotd, 0);
507 				count = 0;
508 			}
509 		} else {
510 			vke_rxfifo_dequeue(sc, m);
511 		}
512 	}
513 
514 	if (count) {
515 		cothread_lock(cotd, 0);
516 		cothread_signal(cotd);
517 		cothread_unlock(cotd, 0);
518 	}
519 	ifnet_deserialize_all(ifp);
520 }
521 
522 /*
523  * vke_tx_intr() is the interrupt function for the transmit cothread.
524  * Calls vke_start() to handle processing transmit mbufs.
525  */
526 static void
527 vke_tx_intr(cothread_t cotd)
528 {
529 	struct vke_softc *sc = cotd->arg;
530 	struct ifnet *ifp = &sc->arpcom.ac_if;
531 	struct mbuf *m;
532 
533 	ifnet_serialize_all(ifp);
534 	cothread_lock(cotd, 0);
535 	if (sc->cotd_tx_exit != VKE_COTD_RUN) {
536 		cothread_unlock(cotd, 0);
537 		ifnet_deserialize_all(ifp);
538 		return;
539 	}
540 	cothread_unlock(cotd, 0);
541 
542 	/*
543 	 * Free TX mbufs that have been processed before starting new
544 	 * ones going to be pipeline friendly.
545 	 */
546 	while ((m = vke_txfifo_done_dequeue(sc, NULL)) != NULL) {
547 		m_freem(m);
548 	}
549 
550 	if ((ifp->if_flags & IFF_RUNNING) == 0)
551 		ifp->if_start(ifp);
552 
553 	ifnet_deserialize_all(ifp);
554 }
555 
556 /*
557  * vke_rx_thread() is the body of the receive cothread.
558  */
559 static void
560 vke_rx_thread(cothread_t cotd)
561 {
562 	struct mbuf *m;
563 	struct vke_softc *sc = cotd->arg;
564 	struct ifnet *ifp = &sc->arpcom.ac_if;
565 	fifo_t fifo = sc->sc_rxfifo;
566 	fd_set fdset;
567 	struct timeval tv;
568 	int count;
569 	int n;
570 
571 	/* Select timeout cannot be infinite since we need to check for
572 	 * the exit flag sc->cotd_rx_exit.
573 	 */
574 	tv.tv_sec = 0;
575 	tv.tv_usec = 500000;
576 
577 	FD_ZERO(&fdset);
578 	count = 0;
579 
580 	while (sc->cotd_rx_exit == VKE_COTD_RUN) {
581 		/*
582 		 * Wait for the RX FIFO to be loaded with
583 		 * empty mbufs.
584 		 */
585 		if (NETFIFOINDEX(fifo->windex + 1) ==
586 		    NETFIFOINDEX(fifo->rindex)) {
587 			usleep(20000);
588 			continue;
589 		}
590 
591 		/*
592 		 * Load data into the rx fifo
593 		 */
594 		m = fifo->array[NETFIFOINDEX(fifo->windex)];
595 		if (m == NULL)
596 			continue;
597 		n = read(sc->sc_fd, mtod(m, void *), MCLBYTES);
598 		if (n > 0) {
599 			ifp->if_ipackets++;
600 			m->m_pkthdr.rcvif = ifp;
601 			m->m_pkthdr.len = m->m_len = n;
602 			cpu_sfence();
603 			++fifo->windex;
604 			if (count++ == VKE_CHUNK) {
605 				cothread_intr(cotd);
606 				count = 0;
607 			}
608 		} else {
609 			if (count) {
610 				cothread_intr(cotd);
611 				count = 0;
612 			}
613 			FD_SET(sc->sc_fd, &fdset);
614 
615 			if (select(sc->sc_fd + 1, &fdset, NULL, NULL, &tv) == -1) {
616 				kprintf(VKE_DEVNAME "%d: select failed for "
617 					"TAP device\n", sc->sc_unit);
618 				usleep(1000000);
619 			}
620 		}
621 	}
622 	cpu_sfence();
623 	sc->cotd_rx_exit = VKE_COTD_DEAD;
624 }
625 
626 /*
627  * vke_tx_thread() is the body of the transmit cothread.
628  */
629 static void
630 vke_tx_thread(cothread_t cotd)
631 {
632 	struct mbuf *m;
633 	struct vke_softc *sc = cotd->arg;
634 	struct ifnet *ifp = &sc->arpcom.ac_if;
635 	int count = 0;
636 
637 	while (sc->cotd_tx_exit == VKE_COTD_RUN) {
638 		/*
639 		 * Write outgoing packets to the TAP interface
640 		 */
641 		m = vke_txfifo_dequeue(sc);
642 		if (m) {
643 			if (m->m_pkthdr.len <= MCLBYTES) {
644 				m_copydata(m, 0, m->m_pkthdr.len, sc->sc_txbuf);
645 				sc->sc_txbuf_len = m->m_pkthdr.len;
646 
647 				if (write(sc->sc_fd, sc->sc_txbuf,
648 					  sc->sc_txbuf_len) < 0) {
649 					ifp->if_oerrors++;
650 				} else {
651 					ifp->if_opackets++;
652 				}
653 			}
654 			if (count++ == VKE_CHUNK) {
655 				cothread_intr(cotd);
656 				count = 0;
657 			}
658 			vke_txfifo_done_enqueue(sc, m);
659 		} else {
660 			if (count) {
661 				cothread_intr(cotd);
662 				count = 0;
663 			}
664 			cothread_lock(cotd, 1);
665 			if (vke_txfifo_empty(sc))
666 				cothread_wait(cotd);
667 			cothread_unlock(cotd, 1);
668 		}
669 	}
670 	cpu_sfence();
671 	sc->cotd_tx_exit = VKE_COTD_DEAD;
672 }
673 
674 static int
675 vke_attach(const struct vknetif_info *info, int unit)
676 {
677 	struct vke_softc *sc;
678 	struct ifnet *ifp;
679 	struct tapinfo tapinfo;
680 	uint8_t enaddr[ETHER_ADDR_LEN];
681 	int fd;
682 
683 	KKASSERT(info->tap_fd >= 0);
684 	fd = info->tap_fd;
685 
686 	/*
687 	 * This is only a TAP device if tap_unit is non-zero.  If
688 	 * connecting to a virtual socket we generate a unique MAC.
689 	 */
690 	if (info->tap_unit >= 0) {
691 		if (ioctl(fd, TAPGIFINFO, &tapinfo) < 0) {
692 			kprintf(VKE_DEVNAME "%d: ioctl(TAPGIFINFO) "
693 				"failed: %s\n", unit, strerror(errno));
694 			return ENXIO;
695 		}
696 
697 		if (ioctl(fd, SIOCGIFADDR, enaddr) < 0) {
698 			kprintf(VKE_DEVNAME "%d: ioctl(SIOCGIFADDR) "
699 				"failed: %s\n", unit, strerror(errno));
700 			return ENXIO;
701 		}
702 	} else {
703 		int fd = open("/dev/urandom", O_RDONLY);
704 		if (fd >= 0) {
705 			read(fd, enaddr + 2, 4);
706 			close(fd);
707 		}
708 		enaddr[4] = (int)getpid() >> 8;
709 		enaddr[5] = (int)getpid() & 255;
710 
711 	}
712 	enaddr[1] += 1;
713 
714 	sc = kmalloc(sizeof(*sc), M_DEVBUF, M_WAITOK | M_ZERO);
715 
716 	sc->sc_txbuf = kmalloc(MCLBYTES, M_DEVBUF, M_WAITOK);
717 	sc->sc_fd = fd;
718 	sc->sc_unit = unit;
719 	sc->sc_tap_unit = info->tap_unit;
720 	sc->sc_addr = info->netif_addr;
721 	sc->sc_mask = info->netif_mask;
722 
723 	ifp = &sc->arpcom.ac_if;
724 	if_initname(ifp, VKE_DEVNAME, sc->sc_unit);
725 
726 	/* NB: after if_initname() */
727 	sysctl_ctx_init(&sc->sc_sysctl_ctx);
728 	sc->sc_sysctl_tree = SYSCTL_ADD_NODE(&sc->sc_sysctl_ctx,
729 					     SYSCTL_STATIC_CHILDREN(_hw),
730 					     OID_AUTO, ifp->if_xname,
731 					     CTLFLAG_RD, 0, "");
732 	if (sc->sc_sysctl_tree == NULL) {
733 		kprintf(VKE_DEVNAME "%d: can't add sysctl node\n", unit);
734 	} else {
735 		SYSCTL_ADD_INT(&sc->sc_sysctl_ctx,
736 			       SYSCTL_CHILDREN(sc->sc_sysctl_tree),
737 			       OID_AUTO, "tap_unit",
738 			       CTLFLAG_RD, &sc->sc_tap_unit, 0,
739 			       "Backend tap(4) unit");
740 	}
741 
742 	ifp->if_softc = sc;
743 	ifp->if_ioctl = vke_ioctl;
744 	ifp->if_start = vke_start;
745 	ifp->if_init = vke_init;
746 	ifp->if_mtu = tapinfo.mtu;
747 	ifp->if_baudrate = tapinfo.baudrate;
748 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
749 	ifq_set_maxlen(&ifp->if_snd, IFQ_MAXLEN);
750 	ifq_set_ready(&ifp->if_snd);
751 
752 	/* TODO: if_media */
753 
754 	ether_ifattach(ifp, enaddr, NULL);
755 
756 	if (bootverbose && sc->sc_addr != 0) {
757 		if_printf(ifp, "pre-configured "
758 			  "address 0x%08x, netmask 0x%08x\n",
759 			  ntohl(sc->sc_addr), ntohl(sc->sc_mask));
760 	}
761 
762 	return 0;
763 }
764 
765 static int
766 vke_init_addr(struct ifnet *ifp, in_addr_t addr, in_addr_t mask)
767 {
768 	struct ifaliasreq ifra;
769 	struct sockaddr_in *sin;
770 	int ret;
771 
772 	ASSERT_SERIALIZED(ifp->if_serializer);
773 
774 	if (bootverbose) {
775 		if_printf(ifp, "add pre-configured "
776 			  "address 0x%08x, netmask 0x%08x\n",
777 			  ntohl(addr), ntohl(mask));
778 	}
779 
780 	bzero(&ifra, sizeof(ifra));
781 
782 	/* NB: no need to set ifaliasreq.ifra_name */
783 
784 	sin = (struct sockaddr_in *)&ifra.ifra_addr;
785 	sin->sin_family = AF_INET;
786 	sin->sin_len = sizeof(*sin);
787 	sin->sin_addr.s_addr = addr;
788 
789 	if (mask != 0) {
790 		sin = (struct sockaddr_in *)&ifra.ifra_mask;
791 		sin->sin_len = sizeof(*sin);
792 		sin->sin_addr.s_addr = mask;
793 	}
794 
795 	/*
796 	 * Temporarily release serializer, in_control() will hold
797 	 * it again before calling ifnet.if_ioctl().
798 	 */
799 	ifnet_deserialize_all(ifp);
800 	ret = in_control(NULL, SIOCAIFADDR, (caddr_t)&ifra, ifp, NULL);
801 	ifnet_serialize_all(ifp);
802 
803 	return ret;
804 }
805