xref: /netbsd-src/sys/net/if_tun.c (revision fd5cb0acea84d278e04e640d37ca2398f894991f)
1 /*	$NetBSD: if_tun.c,v 1.76 2005/01/24 21:25:09 matt Exp $	*/
2 
3 /*
4  * Copyright (c) 1988, Julian Onions <jpo@cs.nott.ac.uk>
5  * Nottingham University 1987.
6  *
7  * This source may be freely distributed, however I would be interested
8  * in any changes that are made.
9  *
10  * This driver takes packets off the IP i/f and hands them up to a
11  * user process to have its wicked way with. This driver has its
12  * roots in a similar driver written by Phil Cockcroft (formerly) at
13  * UCL. This driver is based much more on read/write/poll mode of
14  * operation though.
15  */
16 
17 #include <sys/cdefs.h>
18 __KERNEL_RCSID(0, "$NetBSD: if_tun.c,v 1.76 2005/01/24 21:25:09 matt Exp $");
19 
20 #include "opt_inet.h"
21 #include "opt_ns.h"
22 
23 #include <sys/param.h>
24 #include <sys/proc.h>
25 #include <sys/systm.h>
26 #include <sys/mbuf.h>
27 #include <sys/buf.h>
28 #include <sys/protosw.h>
29 #include <sys/socket.h>
30 #include <sys/ioctl.h>
31 #include <sys/errno.h>
32 #include <sys/syslog.h>
33 #include <sys/select.h>
34 #include <sys/poll.h>
35 #include <sys/file.h>
36 #include <sys/signalvar.h>
37 #include <sys/conf.h>
38 
39 #include <machine/cpu.h>
40 
41 #include <net/if.h>
42 #include <net/if_types.h>
43 #include <net/netisr.h>
44 #include <net/route.h>
45 
46 
47 #ifdef INET
48 #include <netinet/in.h>
49 #include <netinet/in_systm.h>
50 #include <netinet/in_var.h>
51 #include <netinet/ip.h>
52 #include <netinet/if_inarp.h>
53 #endif
54 
55 #ifdef NS
56 #include <netns/ns.h>
57 #include <netns/ns_if.h>
58 #endif
59 
60 #include "bpfilter.h"
61 #if NBPFILTER > 0
62 #include <sys/time.h>
63 #include <net/bpf.h>
64 #endif
65 
66 #include <net/if_tun.h>
67 
68 #define TUNDEBUG	if (tundebug) printf
69 int	tundebug = 0;
70 
71 extern int ifqmaxlen;
72 void	tunattach __P((int));
73 LIST_HEAD(, tun_softc) tun_softc_list;
74 LIST_HEAD(, tun_softc) tunz_softc_list;
75 static struct simplelock tun_softc_lock;
76 
77 int	tun_ioctl __P((struct ifnet *, u_long, caddr_t));
78 int	tun_output __P((struct ifnet *, struct mbuf *, struct sockaddr *,
79 		       struct rtentry *rt));
80 int	tun_clone_create __P((struct if_clone *, int));
81 int	tun_clone_destroy __P((struct ifnet *));
82 
83 struct if_clone tun_cloner =
84     IF_CLONE_INITIALIZER("tun", tun_clone_create, tun_clone_destroy);
85 
86 static void tunattach0 __P((struct tun_softc *));
87 static void tuninit __P((struct tun_softc *));
88 #ifdef ALTQ
89 static void tunstart __P((struct ifnet *));
90 #endif
91 static struct tun_softc *tun_find_unit __P((dev_t));
92 static struct tun_softc *tun_find_zunit __P((int));
93 
94 dev_type_open(tunopen);
95 dev_type_close(tunclose);
96 dev_type_read(tunread);
97 dev_type_write(tunwrite);
98 dev_type_ioctl(tunioctl);
99 dev_type_poll(tunpoll);
100 dev_type_kqfilter(tunkqfilter);
101 
102 const struct cdevsw tun_cdevsw = {
103 	tunopen, tunclose, tunread, tunwrite, tunioctl,
104 	nostop, notty, tunpoll, nommap, tunkqfilter,
105 };
106 
107 void
108 tunattach(unused)
109 	int unused;
110 {
111 
112 	simple_lock_init(&tun_softc_lock);
113 	LIST_INIT(&tun_softc_list);
114 	LIST_INIT(&tunz_softc_list);
115 	if_clone_attach(&tun_cloner);
116 }
117 
118 /*
119  * Find driver instance from dev_t.
120  * Call at splnet().
121  * Returns with tp locked (if found).
122  */
123 static struct tun_softc *
124 tun_find_unit(dev)
125 	dev_t dev;
126 {
127 	struct tun_softc *tp;
128 	int unit = minor(dev);
129 
130 	simple_lock(&tun_softc_lock);
131 	LIST_FOREACH(tp, &tun_softc_list, tun_list)
132 		if (unit == tp->tun_unit)
133 			break;
134 	if (tp)
135 		simple_lock(&tp->tun_lock);
136 	simple_unlock(&tun_softc_lock);
137 
138 	return (tp);
139 }
140 
141 /*
142  * Find zombie driver instance by unit number.
143  * Call at splnet().
144  * Remove tp from list and return it unlocked (if found).
145  */
146 static struct tun_softc *
147 tun_find_zunit(unit)
148 	int unit;
149 {
150 	struct tun_softc *tp;
151 
152 	simple_lock(&tun_softc_lock);
153 	LIST_FOREACH(tp, &tunz_softc_list, tun_list)
154 		if (unit == tp->tun_unit)
155 			break;
156 	if (tp)
157 		LIST_REMOVE(tp, tun_list);
158 	simple_unlock(&tun_softc_lock);
159 #ifdef DIAGNOSTIC
160 	if (tp != NULL && (tp->tun_flags & (TUN_INITED|TUN_OPEN)) != TUN_OPEN)
161 		printf("tun%d: inconsistent flags: %x\n", unit, tp->tun_flags);
162 #endif
163 
164 	return (tp);
165 }
166 
167 int
168 tun_clone_create(ifc, unit)
169 	struct if_clone *ifc;
170 	int unit;
171 {
172 	struct tun_softc *tp;
173 
174 	if ((tp = tun_find_zunit(unit)) == NULL) {
175 		/* Allocate a new instance */
176 		tp = malloc(sizeof(struct tun_softc), M_DEVBUF, M_WAITOK);
177 		(void)memset(tp, 0, sizeof(struct tun_softc));
178 
179 		tp->tun_unit = unit;
180 		simple_lock_init(&tp->tun_lock);
181 	} else {
182 		/* Revive tunnel instance; clear ifp part */
183 		(void)memset(&tp->tun_if, 0, sizeof(struct ifnet));
184 	}
185 
186 	(void)snprintf(tp->tun_if.if_xname, sizeof(tp->tun_if.if_xname),
187 			"%s%d", ifc->ifc_name, unit);
188 	tunattach0(tp);
189 	tp->tun_flags |= TUN_INITED;
190 
191 	simple_lock(&tun_softc_lock);
192 	LIST_INSERT_HEAD(&tun_softc_list, tp, tun_list);
193 	simple_unlock(&tun_softc_lock);
194 
195 	return (0);
196 }
197 
198 void
199 tunattach0(tp)
200 	struct tun_softc *tp;
201 {
202 	struct ifnet *ifp;
203 
204 	ifp = &tp->tun_if;
205 	ifp->if_softc = tp;
206 	ifp->if_mtu = TUNMTU;
207 	ifp->if_ioctl = tun_ioctl;
208 	ifp->if_output = tun_output;
209 #ifdef ALTQ
210 	ifp->if_start = tunstart;
211 #endif
212 	ifp->if_flags = IFF_POINTOPOINT;
213 	ifp->if_type = IFT_TUNNEL;
214 	ifp->if_snd.ifq_maxlen = ifqmaxlen;
215 	ifp->if_collisions = 0;
216 	ifp->if_ierrors = 0;
217 	ifp->if_oerrors = 0;
218 	ifp->if_ipackets = 0;
219 	ifp->if_opackets = 0;
220 	ifp->if_ibytes   = 0;
221 	ifp->if_obytes   = 0;
222 	ifp->if_dlt = DLT_NULL;
223 	IFQ_SET_READY(&ifp->if_snd);
224 	if_attach(ifp);
225 	if_alloc_sadl(ifp);
226 #if NBPFILTER > 0
227 	bpfattach(ifp, DLT_NULL, sizeof(u_int32_t));
228 #endif
229 }
230 
231 int
232 tun_clone_destroy(ifp)
233 	struct ifnet *ifp;
234 {
235 	struct tun_softc *tp = (void *)ifp;
236 	int s, zombie = 0;
237 
238 	s = splnet();
239 	simple_lock(&tun_softc_lock);
240 	simple_lock(&tp->tun_lock);
241 	LIST_REMOVE(tp, tun_list);
242 	if (tp->tun_flags & TUN_OPEN) {
243 		/* Hang on to storage until last close */
244 		zombie = 1;
245 		tp->tun_flags &= ~TUN_INITED;
246 		LIST_INSERT_HEAD(&tunz_softc_list, tp, tun_list);
247 	}
248 	simple_unlock(&tun_softc_lock);
249 
250 	IF_PURGE(&ifp->if_snd);
251 	ifp->if_flags &= ~IFF_RUNNING;
252 
253 	if (tp->tun_flags & TUN_RWAIT) {
254 		tp->tun_flags &= ~TUN_RWAIT;
255 		wakeup((caddr_t)tp);
256 	}
257 	if (tp->tun_flags & TUN_ASYNC && tp->tun_pgid)
258 		fownsignal(tp->tun_pgid, SIGIO, POLL_HUP, 0, NULL);
259 
260 	selwakeup(&tp->tun_rsel);
261 
262 	simple_unlock(&tp->tun_lock);
263 	splx(s);
264 
265 #if NBPFILTER > 0
266 	bpfdetach(ifp);
267 #endif
268 	if_detach(ifp);
269 
270 	if (!zombie)
271 		free(tp, M_DEVBUF);
272 
273 	return (0);
274 }
275 
276 /*
277  * tunnel open - must be superuser & the device must be
278  * configured in
279  */
280 int
281 tunopen(dev, flag, mode, p)
282 	dev_t	dev;
283 	int	flag, mode;
284 	struct proc *p;
285 {
286 	struct ifnet	*ifp;
287 	struct tun_softc *tp;
288 	int	s, error;
289 
290 	if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
291 		return (error);
292 
293 	s = splnet();
294 	tp = tun_find_unit(dev);
295 
296 	if (tp == NULL) {
297 		(void)tun_clone_create(&tun_cloner, minor(dev));
298 		tp = tun_find_unit(dev);
299 		if (tp == NULL) {
300 			error = ENXIO;
301 			goto out_nolock;
302 		}
303 	}
304 
305 	if (tp->tun_flags & TUN_OPEN) {
306 		error = EBUSY;
307 		goto out;
308 	}
309 
310 	ifp = &tp->tun_if;
311 	tp->tun_flags |= TUN_OPEN;
312 	TUNDEBUG("%s: open\n", ifp->if_xname);
313 out:
314 	simple_unlock(&tp->tun_lock);
315 out_nolock:
316 	splx(s);
317 	return (error);
318 }
319 
320 /*
321  * tunclose - close the device - mark i/f down & delete
322  * routing info
323  */
324 int
325 tunclose(dev, flag, mode, p)
326 	dev_t	dev;
327 	int	flag;
328 	int	mode;
329 	struct proc *p;
330 {
331 	int	s;
332 	struct tun_softc *tp;
333 	struct ifnet	*ifp;
334 
335 	s = splnet();
336 	if ((tp = tun_find_zunit(minor(dev))) != NULL) {
337 		/* interface was "destroyed" before the close */
338 		free(tp, M_DEVBUF);
339 		goto out_nolock;
340 	}
341 
342 	if ((tp = tun_find_unit(dev)) == NULL)
343 		goto out_nolock;
344 
345 	ifp = &tp->tun_if;
346 
347 	tp->tun_flags &= ~TUN_OPEN;
348 
349 	/*
350 	 * junk all pending output
351 	 */
352 	IFQ_PURGE(&ifp->if_snd);
353 
354 	if (ifp->if_flags & IFF_UP) {
355 		if_down(ifp);
356 		if (ifp->if_flags & IFF_RUNNING) {
357 			/* find internet addresses and delete routes */
358 			struct ifaddr *ifa;
359 			IFADDR_FOREACH(ifa, ifp) {
360 #ifdef INET
361 				if (ifa->ifa_addr->sa_family == AF_INET) {
362 					rtinit(ifa, (int)RTM_DELETE,
363 					       tp->tun_flags & TUN_DSTADDR
364 							? RTF_HOST
365 							: 0);
366 				}
367 #endif
368 			}
369 		}
370 	}
371 	tp->tun_pgid = 0;
372 	selnotify(&tp->tun_rsel, 0);
373 
374 	TUNDEBUG ("%s: closed\n", ifp->if_xname);
375 	simple_unlock(&tp->tun_lock);
376 out_nolock:
377 	splx(s);
378 	return (0);
379 }
380 
381 /*
382  * Call at splnet() with tp locked.
383  */
384 static void
385 tuninit(tp)
386 	struct tun_softc *tp;
387 {
388 	struct ifnet	*ifp = &tp->tun_if;
389 	struct ifaddr	*ifa;
390 
391 	TUNDEBUG("%s: tuninit\n", ifp->if_xname);
392 
393 	ifp->if_flags |= IFF_UP | IFF_RUNNING;
394 
395 	tp->tun_flags &= ~(TUN_IASET|TUN_DSTADDR);
396 	IFADDR_FOREACH(ifa, ifp) {
397 #ifdef INET
398 		if (ifa->ifa_addr->sa_family == AF_INET) {
399 			struct sockaddr_in *sin;
400 
401 			sin = satosin(ifa->ifa_addr);
402 			if (sin && sin->sin_addr.s_addr)
403 				tp->tun_flags |= TUN_IASET;
404 
405 			if (ifp->if_flags & IFF_POINTOPOINT) {
406 				sin = satosin(ifa->ifa_dstaddr);
407 				if (sin && sin->sin_addr.s_addr)
408 					tp->tun_flags |= TUN_DSTADDR;
409 			}
410 		}
411 #endif
412 	}
413 
414 	return;
415 }
416 
417 /*
418  * Process an ioctl request.
419  */
420 int
421 tun_ioctl(ifp, cmd, data)
422 	struct ifnet *ifp;
423 	u_long cmd;
424 	caddr_t	data;
425 {
426 	int		error = 0, s;
427 	struct tun_softc *tp = (struct tun_softc *)(ifp->if_softc);
428 
429 	s = splnet();
430 	simple_lock(&tp->tun_lock);
431 
432 	switch (cmd) {
433 	case SIOCSIFADDR:
434 		tuninit(tp);
435 		TUNDEBUG("%s: address set\n", ifp->if_xname);
436 		break;
437 	case SIOCSIFDSTADDR:
438 		tuninit(tp);
439 		TUNDEBUG("%s: destination address set\n", ifp->if_xname);
440 		break;
441 	case SIOCSIFBRDADDR:
442 		TUNDEBUG("%s: broadcast address set\n", ifp->if_xname);
443 		break;
444 	case SIOCSIFMTU: {
445 		struct ifreq *ifr = (struct ifreq *) data;
446 		if (ifr->ifr_mtu > TUNMTU || ifr->ifr_mtu < 576) {
447 		    error = EINVAL;
448 		    break;
449 		}
450 		TUNDEBUG("%s: interface mtu set\n", ifp->if_xname);
451 		ifp->if_mtu = ifr->ifr_mtu;
452 		break;
453 	}
454 	case SIOCADDMULTI:
455 	case SIOCDELMULTI: {
456 		struct ifreq *ifr = (struct ifreq *) data;
457 		if (ifr == 0) {
458 	        	error = EAFNOSUPPORT;           /* XXX */
459 			break;
460 		}
461 		switch (ifr->ifr_addr.sa_family) {
462 #ifdef INET
463 		case AF_INET:
464 			break;
465 #endif
466 		default:
467 			error = EAFNOSUPPORT;
468 			break;
469 		}
470 		break;
471 	}
472 	case SIOCSIFFLAGS:
473 		break;
474 	default:
475 		error = EINVAL;
476 	}
477 
478 	simple_unlock(&tp->tun_lock);
479 	splx(s);
480 	return (error);
481 }
482 
483 /*
484  * tun_output - queue packets from higher level ready to put out.
485  */
486 int
487 tun_output(ifp, m0, dst, rt)
488 	struct ifnet   *ifp;
489 	struct mbuf    *m0;
490 	struct sockaddr *dst;
491 	struct rtentry *rt;
492 {
493 	struct tun_softc *tp = ifp->if_softc;
494 	int		s;
495 	int		error;
496 #ifdef INET
497 	int		mlen;
498 #endif
499 	ALTQ_DECL(struct altq_pktattr pktattr;)
500 
501 	s = splnet();
502 	simple_lock(&tp->tun_lock);
503 	TUNDEBUG ("%s: tun_output\n", ifp->if_xname);
504 
505 	if ((tp->tun_flags & TUN_READY) != TUN_READY) {
506 		TUNDEBUG ("%s: not ready 0%o\n", ifp->if_xname,
507 			  tp->tun_flags);
508 		m_freem (m0);
509 		error = EHOSTDOWN;
510 		goto out;
511 	}
512 
513 	/*
514 	 * if the queueing discipline needs packet classification,
515 	 * do it before prepending link headers.
516 	 */
517 	IFQ_CLASSIFY(&ifp->if_snd, m0, dst->sa_family, &pktattr);
518 
519 #if NBPFILTER > 0
520 	if (ifp->if_bpf)
521 		bpf_mtap_af(ifp->if_bpf, dst->sa_family, m0);
522 #endif
523 
524 	switch(dst->sa_family) {
525 #ifdef INET
526 	case AF_INET:
527 		if (tp->tun_flags & TUN_PREPADDR) {
528 			/* Simple link-layer header */
529 			M_PREPEND(m0, dst->sa_len, M_DONTWAIT);
530 			if (m0 == NULL) {
531 				IF_DROP(&ifp->if_snd);
532 				error = ENOBUFS;
533 				goto out;
534 			}
535 			bcopy(dst, mtod(m0, char *), dst->sa_len);
536 		}
537 		/* FALLTHROUGH */
538 	case AF_UNSPEC:
539 		IFQ_ENQUEUE(&ifp->if_snd, m0, &pktattr, error);
540 		if (error) {
541 			ifp->if_collisions++;
542 			error = EAFNOSUPPORT;
543 			goto out;
544 		}
545 		mlen = m0->m_pkthdr.len;
546 		ifp->if_opackets++;
547 		ifp->if_obytes += mlen;
548 		break;
549 #endif
550 	default:
551 		m_freem(m0);
552 		error = EAFNOSUPPORT;
553 		goto out;
554 	}
555 
556 	if (tp->tun_flags & TUN_RWAIT) {
557 		tp->tun_flags &= ~TUN_RWAIT;
558 		wakeup((caddr_t)tp);
559 	}
560 	if (tp->tun_flags & TUN_ASYNC && tp->tun_pgid)
561 		fownsignal(tp->tun_pgid, SIGIO, POLL_IN, POLLIN|POLLRDNORM,
562 		    NULL);
563 
564 	selnotify(&tp->tun_rsel, 0);
565 out:
566 	simple_unlock(&tp->tun_lock);
567 	splx(s);
568 	return (0);
569 }
570 
571 /*
572  * the cdevsw interface is now pretty minimal.
573  */
574 int
575 tunioctl(dev, cmd, data, flag, p)
576 	dev_t		dev;
577 	u_long		cmd;
578 	caddr_t		data;
579 	int		flag;
580 	struct proc	*p;
581 {
582 	struct tun_softc *tp;
583 	int s, error = 0;
584 
585 	s = splnet();
586 	tp = tun_find_unit(dev);
587 
588 	/* interface was "destroyed" already */
589 	if (tp == NULL) {
590 		error = ENXIO;
591 		goto out_nolock;
592 	}
593 
594 	switch (cmd) {
595 	case TUNSDEBUG:
596 		tundebug = *(int *)data;
597 		break;
598 
599 	case TUNGDEBUG:
600 		*(int *)data = tundebug;
601 		break;
602 
603 	case TUNSIFMODE:
604 		switch (*(int *)data & (IFF_POINTOPOINT|IFF_BROADCAST)) {
605 		case IFF_POINTOPOINT:
606 		case IFF_BROADCAST:
607 			if (tp->tun_if.if_flags & IFF_UP) {
608 				error = EBUSY;
609 				goto out;
610 			}
611 			tp->tun_if.if_flags &=
612 				~(IFF_BROADCAST|IFF_POINTOPOINT|IFF_MULTICAST);
613 			tp->tun_if.if_flags |= *(int *)data;
614 			break;
615 		default:
616 			error = EINVAL;
617 			goto out;
618 		}
619 		break;
620 
621 	case TUNSLMODE:
622 		if (*(int *)data)
623 			tp->tun_flags |= TUN_PREPADDR;
624 		else
625 			tp->tun_flags &= ~TUN_PREPADDR;
626 		break;
627 
628 	case FIONBIO:
629 		if (*(int *)data)
630 			tp->tun_flags |= TUN_NBIO;
631 		else
632 			tp->tun_flags &= ~TUN_NBIO;
633 		break;
634 
635 	case FIOASYNC:
636 		if (*(int *)data)
637 			tp->tun_flags |= TUN_ASYNC;
638 		else
639 			tp->tun_flags &= ~TUN_ASYNC;
640 		break;
641 
642 	case FIONREAD:
643 		if (tp->tun_if.if_snd.ifq_head)
644 			*(int *)data = tp->tun_if.if_snd.ifq_head->m_pkthdr.len;
645 		else
646 			*(int *)data = 0;
647 		break;
648 
649 	case TIOCSPGRP:
650 	case FIOSETOWN:
651 		error = fsetown(p, &tp->tun_pgid, cmd, data);
652 		break;
653 
654 	case TIOCGPGRP:
655 	case FIOGETOWN:
656 		error = fgetown(p, tp->tun_pgid, cmd, data);
657 		break;
658 
659 	default:
660 		error = ENOTTY;
661 	}
662 
663 out:
664 	simple_unlock(&tp->tun_lock);
665 out_nolock:
666 	splx(s);
667 	return (error);
668 }
669 
670 /*
671  * The cdevsw read interface - reads a packet at a time, or at
672  * least as much of a packet as can be read.
673  */
674 int
675 tunread(dev, uio, ioflag)
676 	dev_t		dev;
677 	struct uio	*uio;
678 	int		ioflag;
679 {
680 	struct tun_softc *tp;
681 	struct ifnet	*ifp;
682 	struct mbuf	*m, *m0;
683 	int		error = 0, len, s, index;
684 
685 	s = splnet();
686 	tp = tun_find_unit(dev);
687 
688 	/* interface was "destroyed" already */
689 	if (tp == NULL) {
690 		error = ENXIO;
691 		goto out_nolock;
692 	}
693 
694 	index = tp->tun_if.if_index;
695 	ifp = &tp->tun_if;
696 
697 	TUNDEBUG ("%s: read\n", ifp->if_xname);
698 	if ((tp->tun_flags & TUN_READY) != TUN_READY) {
699 		TUNDEBUG ("%s: not ready 0%o\n", ifp->if_xname, tp->tun_flags);
700 		error = EHOSTDOWN;
701 		goto out;
702 	}
703 
704 	tp->tun_flags &= ~TUN_RWAIT;
705 
706 	do {
707 		IFQ_DEQUEUE(&ifp->if_snd, m0);
708 		if (m0 == 0) {
709 			if (tp->tun_flags & TUN_NBIO) {
710 				error = EWOULDBLOCK;
711 				goto out;
712 			}
713 			tp->tun_flags |= TUN_RWAIT;
714 			if (ltsleep((caddr_t)tp, PZERO|PCATCH|PNORELOCK,
715 					"tunread", 0, &tp->tun_lock) != 0) {
716 				error = EINTR;
717 				goto out_nolock;
718 			} else {
719 				/*
720 				 * Maybe the interface was destroyed while
721 				 * we were sleeping, so let's ensure that
722 				 * we're looking at the same (valid) tun
723 				 * interface before looping.
724 				 */
725 				tp = tun_find_unit(dev);
726 				if (tp == NULL) {
727 					error = ENXIO;
728 					goto out_nolock;
729 				}
730 				if (tp->tun_if.if_index != index) {
731 					error = ENXIO;
732 					goto out;
733 				}
734 			}
735 		}
736 	} while (m0 == 0);
737 
738 	simple_unlock(&tp->tun_lock);
739 	splx(s);
740 
741 	/* Copy the mbuf chain */
742 	while (m0 && uio->uio_resid > 0 && error == 0) {
743 		len = min(uio->uio_resid, m0->m_len);
744 		if (len != 0)
745 			error = uiomove(mtod(m0, caddr_t), len, uio);
746 		MFREE(m0, m);
747 		m0 = m;
748 	}
749 
750 	if (m0) {
751 		TUNDEBUG("Dropping mbuf\n");
752 		m_freem(m0);
753 	}
754 	if (error)
755 		ifp->if_ierrors++;
756 
757 	return (error);
758 
759 out:
760 	simple_unlock(&tp->tun_lock);
761 out_nolock:
762 	splx(s);
763 	return (error);
764 }
765 
766 /*
767  * the cdevsw write interface - an atomic write is a packet - or else!
768  */
769 int
770 tunwrite(dev, uio, ioflag)
771 	dev_t		dev;
772 	struct uio	*uio;
773 	int		ioflag;
774 {
775 	struct tun_softc *tp;
776 	struct ifnet	*ifp;
777 	struct mbuf	*top, **mp, *m;
778 	struct ifqueue	*ifq;
779 	struct sockaddr	dst;
780 	int		isr, error = 0, s, tlen, mlen;
781 
782 	s = splnet();
783 	tp = tun_find_unit(dev);
784 
785 	/* interface was "destroyed" already */
786 	if (tp == NULL) {
787 		error = ENXIO;
788 		goto out_nolock;
789 	}
790 
791 	/* Unlock until we've got the data */
792 	simple_unlock(&tp->tun_lock);
793 	splx(s);
794 
795 	ifp = &tp->tun_if;
796 
797 	TUNDEBUG("%s: tunwrite\n", ifp->if_xname);
798 
799 	if (tp->tun_flags & TUN_PREPADDR) {
800 		if (uio->uio_resid < sizeof(dst)) {
801 			error = EIO;
802 			goto out0;
803 		}
804 		error = uiomove((caddr_t)&dst, sizeof(dst), uio);
805 		if (dst.sa_len > sizeof(dst)) {
806 			/* Duh.. */
807 			char discard;
808 			int n = dst.sa_len - sizeof(dst);
809 			while (n--)
810 				if ((error = uiomove(&discard, 1, uio)) != 0) {
811 					goto out0;
812 				}
813 		}
814 	} else {
815 #ifdef INET
816 		dst.sa_family = AF_INET;
817 #endif
818 	}
819 
820 	if (uio->uio_resid > TUNMTU) {
821 		TUNDEBUG("%s: len=%lu!\n", ifp->if_xname,
822 		    (unsigned long)uio->uio_resid);
823 		error = EIO;
824 		goto out0;
825 	}
826 
827 	switch (dst.sa_family) {
828 #ifdef INET
829 	case AF_INET:
830 		ifq = &ipintrq;
831 		isr = NETISR_IP;
832 		break;
833 #endif
834 	default:
835 		error = EAFNOSUPPORT;
836 		goto out0;
837 	}
838 
839 	tlen = uio->uio_resid;
840 
841 	/* get a header mbuf */
842 	MGETHDR(m, M_DONTWAIT, MT_DATA);
843 	if (m == NULL) {
844 		error = ENOBUFS;
845 		goto out0;
846 	}
847 	mlen = MHLEN;
848 
849 	top = NULL;
850 	mp = &top;
851 	while (error == 0 && uio->uio_resid > 0) {
852 		m->m_len = min(mlen, uio->uio_resid);
853 		error = uiomove(mtod(m, caddr_t), m->m_len, uio);
854 		*mp = m;
855 		mp = &m->m_next;
856 		if (error == 0 && uio->uio_resid > 0) {
857 			MGET(m, M_DONTWAIT, MT_DATA);
858 			if (m == NULL) {
859 				error = ENOBUFS;
860 				break;
861 			}
862 			mlen = MLEN;
863 		}
864 	}
865 	if (error) {
866 		if (top != NULL)
867 			m_freem (top);
868 		ifp->if_ierrors++;
869 		goto out0;
870 	}
871 
872 	top->m_pkthdr.len = tlen;
873 	top->m_pkthdr.rcvif = ifp;
874 
875 #if NBPFILTER > 0
876 	if (ifp->if_bpf)
877 		bpf_mtap_af(ifp->if_bpf, AF_INET, top);
878 #endif
879 
880 	s = splnet();
881 	simple_lock(&tp->tun_lock);
882 	if ((tp->tun_flags & TUN_INITED) == 0) {
883 		/* Interface was destroyed */
884 		error = ENXIO;
885 		goto out;
886 	}
887 	if (IF_QFULL(ifq)) {
888 		IF_DROP(ifq);
889 		ifp->if_collisions++;
890 		m_freem(top);
891 		error = ENOBUFS;
892 		goto out;
893 	}
894 
895 	IF_ENQUEUE(ifq, top);
896 	ifp->if_ipackets++;
897 	ifp->if_ibytes += tlen;
898 	schednetisr(isr);
899 out:
900 	simple_unlock(&tp->tun_lock);
901 out_nolock:
902 	splx(s);
903 out0:
904 	return (error);
905 }
906 
907 #ifdef ALTQ
908 /*
909  * Start packet transmission on the interface.
910  * when the interface queue is rate-limited by ALTQ or TBR,
911  * if_start is needed to drain packets from the queue in order
912  * to notify readers when outgoing packets become ready.
913  *
914  * Should be called at splnet.
915  */
916 static void
917 tunstart(ifp)
918 	struct ifnet *ifp;
919 {
920 	struct tun_softc *tp = ifp->if_softc;
921 
922 	if (!ALTQ_IS_ENABLED(&ifp->if_snd) && !TBR_IS_ENABLED(&ifp->if_snd))
923 		return;
924 
925 	simple_lock(&tp->tun_lock);
926 	if (!IF_IS_EMPTY(&ifp->if_snd)) {
927 		if (tp->tun_flags & TUN_RWAIT) {
928 			tp->tun_flags &= ~TUN_RWAIT;
929 			wakeup((caddr_t)tp);
930 		}
931 		if (tp->tun_flags & TUN_ASYNC && tp->tun_pgid)
932 			fownsignal(tp->tun_pgid, SIGIO, POLL_OUT,
933 				POLLOUT|POLLWRNORM, NULL);
934 
935 		selwakeup(&tp->tun_rsel);
936 	}
937 	simple_unlock(&tp->tun_lock);
938 }
939 #endif /* ALTQ */
940 /*
941  * tunpoll - the poll interface, this is only useful on reads
942  * really. The write detect always returns true, write never blocks
943  * anyway, it either accepts the packet or drops it.
944  */
945 int
946 tunpoll(dev, events, p)
947 	dev_t		dev;
948 	int		events;
949 	struct proc	*p;
950 {
951 	struct tun_softc *tp;
952 	struct ifnet	*ifp;
953 	int		s, revents = 0;
954 
955 	s = splnet();
956 	tp = tun_find_unit(dev);
957 
958 	/* interface was "destroyed" already */
959 	if (tp == NULL)
960 		goto out_nolock;
961 
962 	ifp = &tp->tun_if;
963 
964 	TUNDEBUG("%s: tunpoll\n", ifp->if_xname);
965 
966 	if (events & (POLLIN | POLLRDNORM)) {
967 		if (!IFQ_IS_EMPTY(&ifp->if_snd)) {
968 			TUNDEBUG("%s: tunpoll q=%d\n", ifp->if_xname,
969 			    ifp->if_snd.ifq_len);
970 			revents |= events & (POLLIN | POLLRDNORM);
971 		} else {
972 			TUNDEBUG("%s: tunpoll waiting\n", ifp->if_xname);
973 			selrecord(p, &tp->tun_rsel);
974 		}
975 	}
976 
977 	if (events & (POLLOUT | POLLWRNORM))
978 		revents |= events & (POLLOUT | POLLWRNORM);
979 
980 	simple_unlock(&tp->tun_lock);
981 out_nolock:
982 	splx(s);
983 	return (revents);
984 }
985 
986 static void
987 filt_tunrdetach(struct knote *kn)
988 {
989 	struct tun_softc *tp = kn->kn_hook;
990 	int s;
991 
992 	s = splnet();
993 	SLIST_REMOVE(&tp->tun_rsel.sel_klist, kn, knote, kn_selnext);
994 	splx(s);
995 }
996 
997 static int
998 filt_tunread(struct knote *kn, long hint)
999 {
1000 	struct tun_softc *tp = kn->kn_hook;
1001 	struct ifnet *ifp = &tp->tun_if;
1002 	struct mbuf *m;
1003 	int s;
1004 
1005 	s = splnet();
1006 	IF_POLL(&ifp->if_snd, m);
1007 	if (m == NULL) {
1008 		splx(s);
1009 		return (0);
1010 	}
1011 
1012 	for (kn->kn_data = 0; m != NULL; m = m->m_next)
1013 		kn->kn_data += m->m_len;
1014 
1015 	splx(s);
1016 	return (1);
1017 }
1018 
1019 static const struct filterops tunread_filtops =
1020 	{ 1, NULL, filt_tunrdetach, filt_tunread };
1021 
1022 static const struct filterops tun_seltrue_filtops =
1023 	{ 1, NULL, filt_tunrdetach, filt_seltrue };
1024 
1025 int
1026 tunkqfilter(dev_t dev, struct knote *kn)
1027 {
1028 	struct tun_softc *tp;
1029 	struct klist *klist;
1030 	int rv = 0, s;
1031 
1032 	s = splnet();
1033 	tp = tun_find_unit(dev);
1034 	if (tp == NULL)
1035 		goto out_nolock;
1036 
1037 	switch (kn->kn_filter) {
1038 	case EVFILT_READ:
1039 		klist = &tp->tun_rsel.sel_klist;
1040 		kn->kn_fop = &tunread_filtops;
1041 		break;
1042 
1043 	case EVFILT_WRITE:
1044 		klist = &tp->tun_rsel.sel_klist;
1045 		kn->kn_fop = &tun_seltrue_filtops;
1046 		break;
1047 
1048 	default:
1049 		rv = 1;
1050 		goto out;
1051 	}
1052 
1053 	kn->kn_hook = tp;
1054 
1055 	SLIST_INSERT_HEAD(klist, kn, kn_selnext);
1056 
1057 out:
1058 	simple_unlock(&tp->tun_lock);
1059 out_nolock:
1060 	splx(s);
1061 	return (rv);
1062 }
1063