xref: /dflybsd-src/sys/dev/virtual/vkernel/net/if_vke.c (revision 15b18792cb31eac9631d545d53ee5e9f60145cd3)
1 /*
2  * Copyright (c) 2007 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Sepherosa Ziehau <sepherosa@gmail.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 #include <sys/param.h>
36 #include <sys/endian.h>
37 #include <sys/kernel.h>
38 #include <sys/malloc.h>
39 #include <sys/proc.h>
40 #include <sys/serialize.h>
41 #include <sys/socket.h>
42 #include <sys/sockio.h>
43 #include <sys/sysctl.h>
44 
45 #include <machine/md_var.h>
46 #include <machine/cothread.h>
47 
48 #include <net/ethernet.h>
49 #include <net/if.h>
50 #include <net/bpf.h>
51 #include <net/if_arp.h>
52 #include <net/ifq_var.h>
53 
54 #include <netinet/in_var.h>
55 
56 #include <sys/stat.h>
57 #include <net/tap/if_tap.h>
58 #include <err.h>
59 #include <errno.h>
60 #include <stdio.h>
61 #include <string.h>
62 #include <unistd.h>
63 #include <fcntl.h>
64 
65 #define VKE_DEVNAME		"vke"
66 
67 #define VKE_CHUNK	8 /* number of mbufs to queue before interrupting */
68 
69 #define NETFIFOSIZE	256
70 #define NETFIFOMASK	(NETFIFOSIZE -1)
71 #define NETFIFOINDEX(u) ((u) & NETFIFOMASK)
72 
73 #define VKE_COTD_RUN	0
74 #define VKE_COTD_EXIT	1
75 #define VKE_COTD_DEAD	2
76 
77 struct vke_fifo {
78 	struct mbuf	*array[NETFIFOSIZE];
79 	int		rindex;
80 	int		windex;
81 };
82 typedef struct vke_fifo *fifo_t;
83 
84 struct vke_softc {
85 	struct arpcom		arpcom;
86 	int			sc_fd;
87 	int			sc_unit;
88 
89 	cothread_t		cotd_tx;
90 	cothread_t		cotd_rx;
91 
92 	int			cotd_tx_exit;
93 	int			cotd_rx_exit;
94 
95 	void			*sc_txbuf;
96 	int			sc_txbuf_len;
97 
98 	fifo_t			sc_txfifo;
99 	fifo_t			sc_txfifo_done;
100 	fifo_t			sc_rxfifo;
101 
102 	struct sysctl_ctx_list	sc_sysctl_ctx;
103 	struct sysctl_oid	*sc_sysctl_tree;
104 
105 	int			sc_tap_unit;	/* unit of backend tap(4) */
106 	in_addr_t		sc_addr;	/* address */
107 	in_addr_t		sc_mask;	/* netmask */
108 };
109 
110 static void	vke_start(struct ifnet *, struct ifaltq_subque *);
111 static void	vke_init(void *);
112 static int	vke_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
113 
114 static int	vke_attach(const struct vknetif_info *, int);
115 static int	vke_stop(struct vke_softc *);
116 static int	vke_init_addr(struct ifnet *, in_addr_t, in_addr_t);
117 static void	vke_tx_intr(cothread_t cotd);
118 static void	vke_tx_thread(cothread_t cotd);
119 static void	vke_rx_intr(cothread_t cotd);
120 static void	vke_rx_thread(cothread_t cotd);
121 
122 static int vke_txfifo_enqueue(struct vke_softc *sc, struct mbuf *m);
123 static struct mbuf *vke_txfifo_dequeue(struct vke_softc *sc);
124 
125 static int vke_txfifo_done_enqueue(struct vke_softc *sc, struct mbuf *m);
126 static struct mbuf * vke_txfifo_done_dequeue(struct vke_softc *sc, struct mbuf *nm);
127 
128 static struct mbuf *vke_rxfifo_dequeue(struct vke_softc *sc, struct mbuf *nm);
129 static struct mbuf *vke_rxfifo_sniff(struct vke_softc *sc);
130 
131 static void
132 vke_sysinit(void *arg __unused)
133 {
134 	int i, unit;
135 
136 	KASSERT(NetifNum <= VKNETIF_MAX, ("too many netifs: %d", NetifNum));
137 
138 	unit = 0;
139 	for (i = 0; i < NetifNum; ++i) {
140 		if (vke_attach(&NetifInfo[i], unit) == 0)
141 			++unit;
142 	}
143 }
144 SYSINIT(vke, SI_SUB_DRIVERS, SI_ORDER_MIDDLE, vke_sysinit, NULL);
145 
146 /*
147  * vke_txfifo_done_enqueue() - Add an mbuf to the transmit done fifo.  Since
148  * the cothread cannot free transmit mbufs after processing we put them on
149  * the done fifo so the kernel can free them.
150  */
151 static int
152 vke_txfifo_done_enqueue(struct vke_softc *sc, struct mbuf *m)
153 {
154 	fifo_t fifo = sc->sc_txfifo_done;
155 
156 	while (NETFIFOINDEX(fifo->windex + 1) == NETFIFOINDEX(fifo->rindex)) {
157 		usleep(20000);
158 	}
159 
160 	fifo->array[NETFIFOINDEX(fifo->windex)] = m;
161 	cpu_sfence();
162 	++fifo->windex;
163 	return (0);
164 }
165 
166 /*
167  * vke_txfifo_done_dequeue() - Remove an mbuf from the transmit done fifo.
168  */
169 static struct mbuf *
170 vke_txfifo_done_dequeue(struct vke_softc *sc, struct mbuf *nm)
171 {
172 	fifo_t fifo = sc->sc_txfifo_done;
173 	struct mbuf *m;
174 
175 	if (NETFIFOINDEX(fifo->rindex) == NETFIFOINDEX(fifo->windex))
176 		return (NULL);
177 
178 	m = fifo->array[NETFIFOINDEX(fifo->rindex)];
179 	fifo->array[NETFIFOINDEX(fifo->rindex)] = nm;
180 	cpu_lfence();
181 	++fifo->rindex;
182 	return (m);
183 }
184 
185 /*
186  * vke_txfifo_enqueue() - Add an mbuf to the transmit fifo.
187  */
188 static int
189 vke_txfifo_enqueue(struct vke_softc *sc, struct mbuf *m)
190 {
191 	fifo_t fifo = sc->sc_txfifo;
192 
193 	if (NETFIFOINDEX(fifo->windex + 1) == NETFIFOINDEX(fifo->rindex))
194 		return (-1);
195 
196 	fifo->array[NETFIFOINDEX(fifo->windex)] = m;
197 	cpu_sfence();
198 	++fifo->windex;
199 
200 	return (0);
201 }
202 
203 /*
204  * vke_txfifo_dequeue() - Return next mbuf on the transmit fifo if one
205  * exists.
206  */
207 static struct mbuf *
208 vke_txfifo_dequeue(struct vke_softc *sc)
209 {
210 	fifo_t fifo = sc->sc_txfifo;
211 	struct mbuf *m;
212 
213 	if (NETFIFOINDEX(fifo->rindex) == NETFIFOINDEX(fifo->windex))
214 		return (NULL);
215 
216 	m = fifo->array[NETFIFOINDEX(fifo->rindex)];
217 	fifo->array[NETFIFOINDEX(fifo->rindex)] = NULL;
218 
219 	cpu_lfence();
220 	++fifo->rindex;
221 	return (m);
222 }
223 
224 static int
225 vke_txfifo_empty(struct vke_softc *sc)
226 {
227 	fifo_t fifo = sc->sc_txfifo;
228 
229 	if (NETFIFOINDEX(fifo->rindex) == NETFIFOINDEX(fifo->windex))
230 		return (1);
231 	return(0);
232 }
233 
234 /*
235  * vke_rxfifo_dequeue() - Return next mbuf on the receice fifo if one
236  * exists replacing it with newm which should point to a newly allocated
237  * mbuf.
238  */
239 static struct mbuf *
240 vke_rxfifo_dequeue(struct vke_softc *sc, struct mbuf *newm)
241 {
242 	fifo_t fifo = sc->sc_rxfifo;
243 	struct mbuf *m;
244 
245 	if (NETFIFOINDEX(fifo->rindex) == NETFIFOINDEX(fifo->windex))
246 		return (NULL);
247 
248 	m = fifo->array[NETFIFOINDEX(fifo->rindex)];
249 	fifo->array[NETFIFOINDEX(fifo->rindex)] = newm;
250 	cpu_lfence();
251 	++fifo->rindex;
252 	return (m);
253 }
254 
255 /*
256  * Return the next mbuf if available but do NOT remove it from the FIFO.
257  */
258 static struct mbuf *
259 vke_rxfifo_sniff(struct vke_softc *sc)
260 {
261 	fifo_t fifo = sc->sc_rxfifo;
262 	struct mbuf *m;
263 
264 	if (NETFIFOINDEX(fifo->rindex) == NETFIFOINDEX(fifo->windex))
265 		return (NULL);
266 
267 	m = fifo->array[NETFIFOINDEX(fifo->rindex)];
268 	cpu_lfence();
269 	return (m);
270 }
271 
272 static void
273 vke_init(void *xsc)
274 {
275 	struct vke_softc *sc = xsc;
276 	struct ifnet *ifp = &sc->arpcom.ac_if;
277 	int i;
278 
279 	ASSERT_SERIALIZED(ifp->if_serializer);
280 
281 	vke_stop(sc);
282 
283 	ifp->if_flags |= IFF_RUNNING;
284 	ifsq_clr_oactive(ifq_get_subq_default(&ifp->if_snd));
285 
286 	sc->sc_txfifo = kmalloc(sizeof(*sc->sc_txfifo), M_DEVBUF, M_WAITOK);
287 	sc->sc_txfifo_done = kmalloc(sizeof(*sc->sc_txfifo_done), M_DEVBUF, M_WAITOK);
288 
289 	sc->sc_rxfifo = kmalloc(sizeof(*sc->sc_rxfifo), M_DEVBUF, M_WAITOK);
290 	for (i = 0; i < NETFIFOSIZE; i++) {
291 		sc->sc_rxfifo->array[i] = m_getcl(MB_WAIT, MT_DATA, M_PKTHDR);
292 		sc->sc_txfifo->array[i] = NULL;
293 		sc->sc_txfifo_done->array[i] = NULL;
294 	}
295 
296 	sc->cotd_tx_exit = sc->cotd_rx_exit = VKE_COTD_RUN;
297 	sc->cotd_tx = cothread_create(vke_tx_thread, vke_tx_intr, sc, "vke_tx");
298 	sc->cotd_rx = cothread_create(vke_rx_thread, vke_rx_intr, sc, "vke_rx");
299 
300 	if (sc->sc_addr != 0) {
301 		in_addr_t addr, mask;
302 
303 		addr = sc->sc_addr;
304 		mask = sc->sc_mask;
305 
306 		/*
307 		 * Make sure vkernel assigned
308 		 * address will not be added
309 		 * again.
310 		 */
311 		sc->sc_addr = 0;
312 		sc->sc_mask = 0;
313 
314 		vke_init_addr(ifp, addr, mask);
315 	}
316 
317 }
318 
319 /*
320  * Called from kernel.
321  *
322  * NOTE: We can't make any kernel callbacks while holding cothread lock
323  *	 because the cothread lock is not governed by the kernel scheduler
324  *	 (so mplock, tokens, etc will not be released).
325  */
326 static void
327 vke_start(struct ifnet *ifp, struct ifaltq_subque *ifsq)
328 {
329 	struct vke_softc *sc = ifp->if_softc;
330 	struct mbuf *m;
331 	cothread_t cotd = sc->cotd_tx;
332 	int count;
333 
334 	ASSERT_ALTQ_SQ_DEFAULT(ifp, ifsq);
335 	ASSERT_SERIALIZED(ifp->if_serializer);
336 
337 	if ((ifp->if_flags & IFF_RUNNING) == 0 || ifsq_is_oactive(ifsq))
338 		return;
339 
340 	count = 0;
341 	while ((m = ifsq_dequeue(ifsq, NULL)) != NULL) {
342 		if (vke_txfifo_enqueue(sc, m) != -1) {
343 			if (count++ == VKE_CHUNK) {
344 				cothread_lock(cotd, 0);
345 				cothread_signal(cotd);
346 				cothread_unlock(cotd, 0);
347 				count = 0;
348 			}
349 		} else {
350 			m_freem(m);
351 		}
352 	}
353 	if (count) {
354 		cothread_lock(cotd, 0);
355 		cothread_signal(cotd);
356 		cothread_unlock(cotd, 0);
357 	}
358 }
359 
360 static int
361 vke_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data, struct ucred *cr)
362 {
363 	struct vke_softc *sc = ifp->if_softc;
364 	int error = 0;
365 
366 	ASSERT_SERIALIZED(ifp->if_serializer);
367 
368 	switch (cmd) {
369 	case SIOCSIFFLAGS:
370 		if (ifp->if_flags & IFF_UP) {
371 			if ((ifp->if_flags & IFF_RUNNING) == 0)
372 				vke_init(sc);
373 		} else {
374 			if (ifp->if_flags & IFF_RUNNING)
375 				vke_stop(sc);
376 		}
377 		break;
378 	case SIOCGIFMEDIA:
379 	case SIOCSIFMEDIA:
380 		error = EOPNOTSUPP;
381 		/* TODO */
382 		break;
383 	case SIOCGIFSTATUS: {
384 		struct ifstat *ifs = (struct ifstat *)data;
385 		int len;
386 
387 		len = strlen(ifs->ascii);
388 		if (len < sizeof(ifs->ascii)) {
389 			ksnprintf(ifs->ascii + len, sizeof(ifs->ascii) - len,
390 				  "\tBacked by tap%d\n", sc->sc_tap_unit);
391 		}
392 		break;
393 	}
394 	case SIOCSIFADDR:
395 		if (((struct ifaddr *)data)->ifa_addr->sa_family == AF_INET) {
396 			/*
397 			 * If we are explicitly requested to change address,
398 			 * we should invalidate address/netmask passed in
399 			 * from vkernel command line.
400 			 */
401 			sc->sc_addr = 0;
402 			sc->sc_mask = 0;
403 		}
404 		/* FALL THROUGH */
405 	default:
406 		error = ether_ioctl(ifp, cmd, data);
407 		break;
408 	}
409 	return error;
410 }
411 
412 static int
413 vke_stop(struct vke_softc *sc)
414 {
415 	struct ifnet *ifp = &sc->arpcom.ac_if;
416 	int i;
417 
418 	ASSERT_SERIALIZED(ifp->if_serializer);
419 
420 	ifp->if_flags &= ~IFF_RUNNING;
421 	ifsq_clr_oactive(ifq_get_subq_default(&ifp->if_snd));
422 
423 	if (sc) {
424 		if (sc->cotd_tx) {
425 			cothread_lock(sc->cotd_tx, 0);
426 			if (sc->cotd_tx_exit == VKE_COTD_RUN)
427 				sc->cotd_tx_exit = VKE_COTD_EXIT;
428 			cothread_signal(sc->cotd_tx);
429 			cothread_unlock(sc->cotd_tx, 0);
430 			cothread_delete(&sc->cotd_tx);
431 		}
432 		if (sc->cotd_rx) {
433 			cothread_lock(sc->cotd_rx, 0);
434 			if (sc->cotd_rx_exit == VKE_COTD_RUN)
435 				sc->cotd_rx_exit = VKE_COTD_EXIT;
436 			cothread_signal(sc->cotd_rx);
437 			cothread_unlock(sc->cotd_rx, 0);
438 			cothread_delete(&sc->cotd_rx);
439 		}
440 
441 		for (i = 0; i < NETFIFOSIZE; i++) {
442 			if (sc->sc_rxfifo && sc->sc_rxfifo->array[i]) {
443 				m_freem(sc->sc_rxfifo->array[i]);
444 				sc->sc_rxfifo->array[i] = NULL;
445 			}
446 			if (sc->sc_txfifo && sc->sc_txfifo->array[i]) {
447 				m_freem(sc->sc_txfifo->array[i]);
448 				sc->sc_txfifo->array[i] = NULL;
449 			}
450 			if (sc->sc_txfifo_done && sc->sc_txfifo_done->array[i]) {
451 				m_freem(sc->sc_txfifo_done->array[i]);
452 				sc->sc_txfifo_done->array[i] = NULL;
453 			}
454 		}
455 
456 		if (sc->sc_txfifo) {
457 			kfree(sc->sc_txfifo, M_DEVBUF);
458 			sc->sc_txfifo = NULL;
459 		}
460 
461 		if (sc->sc_txfifo_done) {
462 			kfree(sc->sc_txfifo_done, M_DEVBUF);
463 			sc->sc_txfifo_done = NULL;
464 		}
465 
466 		if (sc->sc_rxfifo) {
467 			kfree(sc->sc_rxfifo, M_DEVBUF);
468 			sc->sc_rxfifo = NULL;
469 		}
470 	}
471 
472 
473 	return 0;
474 }
475 
476 /*
477  * vke_rx_intr() is the interrupt function for the receive cothread.
478  */
479 static void
480 vke_rx_intr(cothread_t cotd)
481 {
482 	struct mbuf *m;
483 	struct mbuf *nm;
484 	struct vke_softc *sc = cotd->arg;
485 	struct ifnet *ifp = &sc->arpcom.ac_if;
486 	static int count = 0;
487 
488 	ifnet_serialize_all(ifp);
489 	cothread_lock(cotd, 0);
490 
491 	if (sc->cotd_rx_exit != VKE_COTD_RUN) {
492 		cothread_unlock(cotd, 0);
493 		ifnet_deserialize_all(ifp);
494 		return;
495 	}
496 	cothread_unlock(cotd, 0);
497 
498 	while ((m = vke_rxfifo_sniff(sc)) != NULL) {
499 		nm = m_getcl(MB_DONTWAIT, MT_DATA, M_PKTHDR);
500 		if (nm) {
501 			vke_rxfifo_dequeue(sc, nm);
502 			ifp->if_input(ifp, m);
503 			if (count++ == VKE_CHUNK) {
504 				cothread_lock(cotd, 0);
505 				cothread_signal(cotd);
506 				cothread_unlock(cotd, 0);
507 				count = 0;
508 			}
509 		} else {
510 			vke_rxfifo_dequeue(sc, m);
511 		}
512 	}
513 
514 	if (count) {
515 		cothread_lock(cotd, 0);
516 		cothread_signal(cotd);
517 		cothread_unlock(cotd, 0);
518 	}
519 	ifnet_deserialize_all(ifp);
520 }
521 
522 /*
523  * vke_tx_intr() is the interrupt function for the transmit cothread.
524  * Calls vke_start() to handle processing transmit mbufs.
525  */
526 static void
527 vke_tx_intr(cothread_t cotd)
528 {
529 	struct vke_softc *sc = cotd->arg;
530 	struct ifnet *ifp = &sc->arpcom.ac_if;
531 	struct mbuf *m;
532 
533 	ifnet_serialize_all(ifp);
534 	cothread_lock(cotd, 0);
535 	if (sc->cotd_tx_exit != VKE_COTD_RUN) {
536 		cothread_unlock(cotd, 0);
537 		ifnet_deserialize_all(ifp);
538 		return;
539 	}
540 	cothread_unlock(cotd, 0);
541 
542 	/*
543 	 * Free TX mbufs that have been processed before starting new
544 	 * ones going to be pipeline friendly.
545 	 */
546 	while ((m = vke_txfifo_done_dequeue(sc, NULL)) != NULL) {
547 		m_freem(m);
548 	}
549 
550 	if ((ifp->if_flags & IFF_RUNNING) == 0)
551 		if_devstart(ifp);
552 
553 	ifnet_deserialize_all(ifp);
554 }
555 
556 /*
557  * vke_rx_thread() is the body of the receive cothread.
558  */
559 static void
560 vke_rx_thread(cothread_t cotd)
561 {
562 	struct mbuf *m;
563 	struct vke_softc *sc = cotd->arg;
564 	struct ifnet *ifp = &sc->arpcom.ac_if;
565 	fifo_t fifo = sc->sc_rxfifo;
566 	fd_set fdset;
567 	struct timeval tv;
568 	int count;
569 	int n;
570 
571 	/* Select timeout cannot be infinite since we need to check for
572 	 * the exit flag sc->cotd_rx_exit.
573 	 */
574 	tv.tv_sec = 0;
575 	tv.tv_usec = 500000;
576 
577 	FD_ZERO(&fdset);
578 	count = 0;
579 
580 	while (sc->cotd_rx_exit == VKE_COTD_RUN) {
581 		/*
582 		 * Wait for the RX FIFO to be loaded with
583 		 * empty mbufs.
584 		 */
585 		if (NETFIFOINDEX(fifo->windex + 1) ==
586 		    NETFIFOINDEX(fifo->rindex)) {
587 			usleep(20000);
588 			continue;
589 		}
590 
591 		/*
592 		 * Load data into the rx fifo
593 		 */
594 		m = fifo->array[NETFIFOINDEX(fifo->windex)];
595 		if (m == NULL)
596 			continue;
597 		n = read(sc->sc_fd, mtod(m, void *), MCLBYTES);
598 		if (n > 0) {
599 			ifp->if_ipackets++;
600 			m->m_pkthdr.rcvif = ifp;
601 			m->m_pkthdr.len = m->m_len = n;
602 			cpu_sfence();
603 			++fifo->windex;
604 			if (count++ == VKE_CHUNK) {
605 				cothread_intr(cotd);
606 				count = 0;
607 			}
608 		} else {
609 			if (count) {
610 				cothread_intr(cotd);
611 				count = 0;
612 			}
613 			FD_SET(sc->sc_fd, &fdset);
614 
615 			if (select(sc->sc_fd + 1, &fdset, NULL, NULL, &tv) == -1) {
616 				kprintf(VKE_DEVNAME "%d: select failed for "
617 					"TAP device\n", sc->sc_unit);
618 				usleep(1000000);
619 			}
620 		}
621 	}
622 	cpu_sfence();
623 	sc->cotd_rx_exit = VKE_COTD_DEAD;
624 }
625 
626 /*
627  * vke_tx_thread() is the body of the transmit cothread.
628  */
629 static void
630 vke_tx_thread(cothread_t cotd)
631 {
632 	struct mbuf *m;
633 	struct vke_softc *sc = cotd->arg;
634 	struct ifnet *ifp = &sc->arpcom.ac_if;
635 	int count = 0;
636 
637 	while (sc->cotd_tx_exit == VKE_COTD_RUN) {
638 		/*
639 		 * Write outgoing packets to the TAP interface
640 		 */
641 		m = vke_txfifo_dequeue(sc);
642 		if (m) {
643 			if (m->m_pkthdr.len <= MCLBYTES) {
644 				m_copydata(m, 0, m->m_pkthdr.len, sc->sc_txbuf);
645 				sc->sc_txbuf_len = m->m_pkthdr.len;
646 
647 				if (write(sc->sc_fd, sc->sc_txbuf,
648 					  sc->sc_txbuf_len) < 0) {
649 					ifp->if_oerrors++;
650 				} else {
651 					ifp->if_opackets++;
652 				}
653 			}
654 			if (count++ == VKE_CHUNK) {
655 				cothread_intr(cotd);
656 				count = 0;
657 			}
658 			vke_txfifo_done_enqueue(sc, m);
659 		} else {
660 			if (count) {
661 				cothread_intr(cotd);
662 				count = 0;
663 			}
664 			cothread_lock(cotd, 1);
665 			if (vke_txfifo_empty(sc))
666 				cothread_wait(cotd);
667 			cothread_unlock(cotd, 1);
668 		}
669 	}
670 	cpu_sfence();
671 	sc->cotd_tx_exit = VKE_COTD_DEAD;
672 }
673 
674 static int
675 vke_attach(const struct vknetif_info *info, int unit)
676 {
677 	struct vke_softc *sc;
678 	struct ifnet *ifp;
679 	struct tapinfo tapinfo;
680 	uint8_t enaddr[ETHER_ADDR_LEN];
681 	int fd;
682 
683 	KKASSERT(info->tap_fd >= 0);
684 	fd = info->tap_fd;
685 
686 	/*
687 	 * This is only a TAP device if tap_unit is non-zero.  If
688 	 * connecting to a virtual socket we generate a unique MAC.
689 	 */
690 	if (info->tap_unit >= 0) {
691 		if (ioctl(fd, TAPGIFINFO, &tapinfo) < 0) {
692 			kprintf(VKE_DEVNAME "%d: ioctl(TAPGIFINFO) "
693 				"failed: %s\n", unit, strerror(errno));
694 			return ENXIO;
695 		}
696 
697 		if (ioctl(fd, SIOCGIFADDR, enaddr) < 0) {
698 			kprintf(VKE_DEVNAME "%d: ioctl(SIOCGIFADDR) "
699 				"failed: %s\n", unit, strerror(errno));
700 			return ENXIO;
701 		}
702 	} else {
703 		int fd = open("/dev/urandom", O_RDONLY);
704 		if (fd >= 0) {
705 			read(fd, enaddr + 2, 4);
706 			close(fd);
707 		}
708 		enaddr[4] = (int)getpid() >> 8;
709 		enaddr[5] = (int)getpid() & 255;
710 
711 	}
712 	enaddr[1] += 1;
713 
714 	sc = kmalloc(sizeof(*sc), M_DEVBUF, M_WAITOK | M_ZERO);
715 
716 	sc->sc_txbuf = kmalloc(MCLBYTES, M_DEVBUF, M_WAITOK);
717 	sc->sc_fd = fd;
718 	sc->sc_unit = unit;
719 	sc->sc_tap_unit = info->tap_unit;
720 	sc->sc_addr = info->netif_addr;
721 	sc->sc_mask = info->netif_mask;
722 
723 	ifp = &sc->arpcom.ac_if;
724 	if_initname(ifp, VKE_DEVNAME, sc->sc_unit);
725 
726 	/* NB: after if_initname() */
727 	sysctl_ctx_init(&sc->sc_sysctl_ctx);
728 	sc->sc_sysctl_tree = SYSCTL_ADD_NODE(&sc->sc_sysctl_ctx,
729 					     SYSCTL_STATIC_CHILDREN(_hw),
730 					     OID_AUTO, ifp->if_xname,
731 					     CTLFLAG_RD, 0, "");
732 	if (sc->sc_sysctl_tree == NULL) {
733 		kprintf(VKE_DEVNAME "%d: can't add sysctl node\n", unit);
734 	} else {
735 		SYSCTL_ADD_INT(&sc->sc_sysctl_ctx,
736 			       SYSCTL_CHILDREN(sc->sc_sysctl_tree),
737 			       OID_AUTO, "tap_unit",
738 			       CTLFLAG_RD, &sc->sc_tap_unit, 0,
739 			       "Backend tap(4) unit");
740 	}
741 
742 	ifp->if_softc = sc;
743 	ifp->if_ioctl = vke_ioctl;
744 	ifp->if_start = vke_start;
745 	ifp->if_init = vke_init;
746 	ifp->if_mtu = tapinfo.mtu;
747 	ifp->if_baudrate = tapinfo.baudrate;
748 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
749 	ifq_set_maxlen(&ifp->if_snd, IFQ_MAXLEN);
750 	ifq_set_ready(&ifp->if_snd);
751 
752 	/* TODO: if_media */
753 
754 	ether_ifattach(ifp, enaddr, NULL);
755 
756 	if (bootverbose && sc->sc_addr != 0) {
757 		if_printf(ifp, "pre-configured "
758 			  "address 0x%08x, netmask 0x%08x\n",
759 			  ntohl(sc->sc_addr), ntohl(sc->sc_mask));
760 	}
761 
762 	return 0;
763 }
764 
765 static int
766 vke_init_addr(struct ifnet *ifp, in_addr_t addr, in_addr_t mask)
767 {
768 	struct ifaliasreq ifra;
769 	struct sockaddr_in *sin;
770 	int ret;
771 
772 	ASSERT_SERIALIZED(ifp->if_serializer);
773 
774 	if (bootverbose) {
775 		if_printf(ifp, "add pre-configured "
776 			  "address 0x%08x, netmask 0x%08x\n",
777 			  ntohl(addr), ntohl(mask));
778 	}
779 
780 	bzero(&ifra, sizeof(ifra));
781 
782 	/* NB: no need to set ifaliasreq.ifra_name */
783 
784 	sin = (struct sockaddr_in *)&ifra.ifra_addr;
785 	sin->sin_family = AF_INET;
786 	sin->sin_len = sizeof(*sin);
787 	sin->sin_addr.s_addr = addr;
788 
789 	if (mask != 0) {
790 		sin = (struct sockaddr_in *)&ifra.ifra_mask;
791 		sin->sin_len = sizeof(*sin);
792 		sin->sin_addr.s_addr = mask;
793 	}
794 
795 	/*
796 	 * Temporarily release serializer, in_control() will hold
797 	 * it again before calling ifnet.if_ioctl().
798 	 */
799 	ifnet_deserialize_all(ifp);
800 	ret = in_control(NULL, SIOCAIFADDR, (caddr_t)&ifra, ifp, NULL);
801 	ifnet_serialize_all(ifp);
802 
803 	return ret;
804 }
805