xref: /netbsd-src/sys/net/if_tun.c (revision 23c8222edbfb0f0932d88a8351d3a0cf817dfb9e)
1 /*	$NetBSD: if_tun.c,v 1.72 2004/08/19 20:58:24 christos Exp $	*/
2 
3 /*
4  * Copyright (c) 1988, Julian Onions <jpo@cs.nott.ac.uk>
5  * Nottingham University 1987.
6  *
7  * This source may be freely distributed, however I would be interested
8  * in any changes that are made.
9  *
10  * This driver takes packets off the IP i/f and hands them up to a
11  * user process to have its wicked way with. This driver has its
12  * roots in a similar driver written by Phil Cockcroft (formerly) at
13  * UCL. This driver is based much more on read/write/poll mode of
14  * operation though.
15  */
16 
17 #include <sys/cdefs.h>
18 __KERNEL_RCSID(0, "$NetBSD: if_tun.c,v 1.72 2004/08/19 20:58:24 christos Exp $");
19 
20 #include "tun.h"
21 
22 #include "opt_inet.h"
23 #include "opt_ns.h"
24 
25 #include <sys/param.h>
26 #include <sys/proc.h>
27 #include <sys/systm.h>
28 #include <sys/mbuf.h>
29 #include <sys/buf.h>
30 #include <sys/protosw.h>
31 #include <sys/socket.h>
32 #include <sys/ioctl.h>
33 #include <sys/errno.h>
34 #include <sys/syslog.h>
35 #include <sys/select.h>
36 #include <sys/poll.h>
37 #include <sys/file.h>
38 #include <sys/signalvar.h>
39 #include <sys/conf.h>
40 
41 #include <machine/cpu.h>
42 
43 #include <net/if.h>
44 #include <net/if_types.h>
45 #include <net/netisr.h>
46 #include <net/route.h>
47 
48 
49 #ifdef INET
50 #include <netinet/in.h>
51 #include <netinet/in_systm.h>
52 #include <netinet/in_var.h>
53 #include <netinet/ip.h>
54 #include <netinet/if_inarp.h>
55 #endif
56 
57 #ifdef NS
58 #include <netns/ns.h>
59 #include <netns/ns_if.h>
60 #endif
61 
62 #include "bpfilter.h"
63 #if NBPFILTER > 0
64 #include <sys/time.h>
65 #include <net/bpf.h>
66 #endif
67 
68 #include <net/if_tun.h>
69 
70 #define TUNDEBUG	if (tundebug) printf
71 int	tundebug = 0;
72 
73 extern int ifqmaxlen;
74 void	tunattach __P((int));
75 LIST_HEAD(, tun_softc) tun_softc_list;
76 LIST_HEAD(, tun_softc) tunz_softc_list;
77 static struct simplelock tun_softc_lock;
78 
79 int	tun_ioctl __P((struct ifnet *, u_long, caddr_t));
80 int	tun_output __P((struct ifnet *, struct mbuf *, struct sockaddr *,
81 		       struct rtentry *rt));
82 int	tun_clone_create __P((struct if_clone *, int));
83 void	tun_clone_destroy __P((struct ifnet *));
84 
85 struct if_clone tun_cloner =
86     IF_CLONE_INITIALIZER("tun", tun_clone_create, tun_clone_destroy);
87 
88 static void tunattach0 __P((struct tun_softc *));
89 static void tuninit __P((struct tun_softc *));
90 #ifdef ALTQ
91 static void tunstart __P((struct ifnet *));
92 #endif
93 static struct tun_softc *tun_find_unit __P((dev_t));
94 static struct tun_softc *tun_find_zunit __P((int));
95 
96 dev_type_open(tunopen);
97 dev_type_close(tunclose);
98 dev_type_read(tunread);
99 dev_type_write(tunwrite);
100 dev_type_ioctl(tunioctl);
101 dev_type_poll(tunpoll);
102 dev_type_kqfilter(tunkqfilter);
103 
104 const struct cdevsw tun_cdevsw = {
105 	tunopen, tunclose, tunread, tunwrite, tunioctl,
106 	nostop, notty, tunpoll, nommap, tunkqfilter,
107 };
108 
109 void
110 tunattach(unused)
111 	int unused;
112 {
113 
114 	simple_lock_init(&tun_softc_lock);
115 	LIST_INIT(&tun_softc_list);
116 	LIST_INIT(&tunz_softc_list);
117 	if_clone_attach(&tun_cloner);
118 }
119 
120 /*
121  * Find driver instance from dev_t.
122  * Call at splnet().
123  * Returns with tp locked (if found).
124  */
125 static struct tun_softc *
126 tun_find_unit(dev)
127 	dev_t dev;
128 {
129 	struct tun_softc *tp;
130 	int unit = minor(dev);
131 
132 	simple_lock(&tun_softc_lock);
133 	LIST_FOREACH(tp, &tun_softc_list, tun_list)
134 		if (unit == tp->tun_unit)
135 			break;
136 	if (tp)
137 		simple_lock(&tp->tun_lock);
138 	simple_unlock(&tun_softc_lock);
139 
140 	return (tp);
141 }
142 
143 /*
144  * Find zombie driver instance by unit number.
145  * Call at splnet().
146  * Remove tp from list and return it unlocked (if found).
147  */
148 static struct tun_softc *
149 tun_find_zunit(unit)
150 	int unit;
151 {
152 	struct tun_softc *tp;
153 
154 	simple_lock(&tun_softc_lock);
155 	LIST_FOREACH(tp, &tunz_softc_list, tun_list)
156 		if (unit == tp->tun_unit)
157 			break;
158 	if (tp)
159 		LIST_REMOVE(tp, tun_list);
160 	simple_unlock(&tun_softc_lock);
161 #ifdef DIAGNOSTIC
162 	if (tp != NULL && (tp->tun_flags & (TUN_INITED|TUN_OPEN)) != TUN_OPEN)
163 		printf("tun%d: inconsistent flags: %x\n", unit, tp->tun_flags);
164 #endif
165 
166 	return (tp);
167 }
168 
169 int
170 tun_clone_create(ifc, unit)
171 	struct if_clone *ifc;
172 	int unit;
173 {
174 	struct tun_softc *tp;
175 
176 	if ((tp = tun_find_zunit(unit)) == NULL) {
177 		/* Allocate a new instance */
178 		tp = malloc(sizeof(struct tun_softc), M_DEVBUF, M_WAITOK);
179 		(void)memset(tp, 0, sizeof(struct tun_softc));
180 
181 		tp->tun_unit = unit;
182 		simple_lock_init(&tp->tun_lock);
183 	} else {
184 		/* Revive tunnel instance; clear ifp part */
185 		(void)memset(&tp->tun_if, 0, sizeof(struct ifnet));
186 	}
187 
188 	(void)snprintf(tp->tun_if.if_xname, sizeof(tp->tun_if.if_xname),
189 			"%s%d", ifc->ifc_name, unit);
190 	tunattach0(tp);
191 	tp->tun_flags |= TUN_INITED;
192 
193 	simple_lock(&tun_softc_lock);
194 	LIST_INSERT_HEAD(&tun_softc_list, tp, tun_list);
195 	simple_unlock(&tun_softc_lock);
196 
197 	return (0);
198 }
199 
200 void
201 tunattach0(tp)
202 	struct tun_softc *tp;
203 {
204 	struct ifnet *ifp;
205 
206 	ifp = &tp->tun_if;
207 	ifp->if_softc = tp;
208 	ifp->if_mtu = TUNMTU;
209 	ifp->if_ioctl = tun_ioctl;
210 	ifp->if_output = tun_output;
211 #ifdef ALTQ
212 	ifp->if_start = tunstart;
213 #endif
214 	ifp->if_flags = IFF_POINTOPOINT;
215 	ifp->if_type = IFT_TUNNEL;
216 	ifp->if_snd.ifq_maxlen = ifqmaxlen;
217 	ifp->if_collisions = 0;
218 	ifp->if_ierrors = 0;
219 	ifp->if_oerrors = 0;
220 	ifp->if_ipackets = 0;
221 	ifp->if_opackets = 0;
222 	ifp->if_ibytes   = 0;
223 	ifp->if_obytes   = 0;
224 	ifp->if_dlt = DLT_NULL;
225 	IFQ_SET_READY(&ifp->if_snd);
226 	if_attach(ifp);
227 	if_alloc_sadl(ifp);
228 #if NBPFILTER > 0
229 	bpfattach(ifp, DLT_NULL, sizeof(u_int32_t));
230 #endif
231 }
232 
233 void
234 tun_clone_destroy(ifp)
235 	struct ifnet *ifp;
236 {
237 	struct tun_softc *tp = (void *)ifp;
238 	int s, zombie = 0;
239 
240 	s = splnet();
241 	simple_lock(&tun_softc_lock);
242 	simple_lock(&tp->tun_lock);
243 	LIST_REMOVE(tp, tun_list);
244 	if (tp->tun_flags & TUN_OPEN) {
245 		/* Hang on to storage until last close */
246 		zombie = 1;
247 		tp->tun_flags &= ~TUN_INITED;
248 		LIST_INSERT_HEAD(&tunz_softc_list, tp, tun_list);
249 	}
250 	simple_unlock(&tun_softc_lock);
251 
252 	IF_PURGE(&ifp->if_snd);
253 	ifp->if_flags &= ~IFF_RUNNING;
254 
255 	if (tp->tun_flags & TUN_RWAIT) {
256 		tp->tun_flags &= ~TUN_RWAIT;
257 		wakeup((caddr_t)tp);
258 	}
259 	if (tp->tun_flags & TUN_ASYNC && tp->tun_pgid)
260 		fownsignal(tp->tun_pgid, SIGIO, POLL_HUP, 0, NULL);
261 
262 	selwakeup(&tp->tun_rsel);
263 
264 	simple_unlock(&tp->tun_lock);
265 	splx(s);
266 
267 #if NBPFILTER > 0
268 	bpfdetach(ifp);
269 #endif
270 	if_detach(ifp);
271 
272 	if (!zombie)
273 		free(tp, M_DEVBUF);
274 }
275 
276 /*
277  * tunnel open - must be superuser & the device must be
278  * configured in
279  */
280 int
281 tunopen(dev, flag, mode, p)
282 	dev_t	dev;
283 	int	flag, mode;
284 	struct proc *p;
285 {
286 	struct ifnet	*ifp;
287 	struct tun_softc *tp;
288 	int	s, error;
289 
290 	if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
291 		return (error);
292 
293 	if (NTUN < 1)
294 		return (ENXIO);
295 
296 	s = splnet();
297 	tp = tun_find_unit(dev);
298 
299 	if (tp == NULL) {
300 		(void)tun_clone_create(&tun_cloner, minor(dev));
301 		tp = tun_find_unit(dev);
302 		if (tp == NULL) {
303 			error = ENXIO;
304 			goto out_nolock;
305 		}
306 	}
307 
308 	if (tp->tun_flags & TUN_OPEN) {
309 		error = EBUSY;
310 		goto out;
311 	}
312 
313 	ifp = &tp->tun_if;
314 	tp->tun_flags |= TUN_OPEN;
315 	TUNDEBUG("%s: open\n", ifp->if_xname);
316 out:
317 	simple_unlock(&tp->tun_lock);
318 out_nolock:
319 	splx(s);
320 	return (error);
321 }
322 
323 /*
324  * tunclose - close the device - mark i/f down & delete
325  * routing info
326  */
327 int
328 tunclose(dev, flag, mode, p)
329 	dev_t	dev;
330 	int	flag;
331 	int	mode;
332 	struct proc *p;
333 {
334 	int	s;
335 	struct tun_softc *tp;
336 	struct ifnet	*ifp;
337 
338 	s = splnet();
339 	if ((tp = tun_find_zunit(minor(dev))) != NULL) {
340 		/* interface was "destroyed" before the close */
341 		free(tp, M_DEVBUF);
342 		goto out_nolock;
343 	}
344 
345 	if ((tp = tun_find_unit(dev)) == NULL)
346 		goto out_nolock;
347 
348 	ifp = &tp->tun_if;
349 
350 	tp->tun_flags &= ~TUN_OPEN;
351 
352 	/*
353 	 * junk all pending output
354 	 */
355 	IFQ_PURGE(&ifp->if_snd);
356 
357 	if (ifp->if_flags & IFF_UP) {
358 		if_down(ifp);
359 		if (ifp->if_flags & IFF_RUNNING) {
360 			/* find internet addresses and delete routes */
361 			struct ifaddr *ifa;
362 			TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
363 #ifdef INET
364 				if (ifa->ifa_addr->sa_family == AF_INET) {
365 					rtinit(ifa, (int)RTM_DELETE,
366 					       tp->tun_flags & TUN_DSTADDR
367 							? RTF_HOST
368 							: 0);
369 				}
370 #endif
371 			}
372 		}
373 	}
374 	tp->tun_pgid = 0;
375 	selnotify(&tp->tun_rsel, 0);
376 
377 	TUNDEBUG ("%s: closed\n", ifp->if_xname);
378 	simple_unlock(&tp->tun_lock);
379 out_nolock:
380 	splx(s);
381 	return (0);
382 }
383 
384 /*
385  * Call at splnet() with tp locked.
386  */
387 static void
388 tuninit(tp)
389 	struct tun_softc *tp;
390 {
391 	struct ifnet	*ifp = &tp->tun_if;
392 	struct ifaddr	*ifa;
393 
394 	TUNDEBUG("%s: tuninit\n", ifp->if_xname);
395 
396 	ifp->if_flags |= IFF_UP | IFF_RUNNING;
397 
398 	tp->tun_flags &= ~(TUN_IASET|TUN_DSTADDR);
399 	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
400 #ifdef INET
401 		if (ifa->ifa_addr->sa_family == AF_INET) {
402 			struct sockaddr_in *sin;
403 
404 			sin = satosin(ifa->ifa_addr);
405 			if (sin && sin->sin_addr.s_addr)
406 				tp->tun_flags |= TUN_IASET;
407 
408 			if (ifp->if_flags & IFF_POINTOPOINT) {
409 				sin = satosin(ifa->ifa_dstaddr);
410 				if (sin && sin->sin_addr.s_addr)
411 					tp->tun_flags |= TUN_DSTADDR;
412 			}
413 		}
414 #endif
415 	}
416 
417 	return;
418 }
419 
420 /*
421  * Process an ioctl request.
422  */
423 int
424 tun_ioctl(ifp, cmd, data)
425 	struct ifnet *ifp;
426 	u_long cmd;
427 	caddr_t	data;
428 {
429 	int		error = 0, s;
430 	struct tun_softc *tp = (struct tun_softc *)(ifp->if_softc);
431 
432 	s = splnet();
433 	simple_lock(&tp->tun_lock);
434 
435 	switch (cmd) {
436 	case SIOCSIFADDR:
437 		tuninit(tp);
438 		TUNDEBUG("%s: address set\n", ifp->if_xname);
439 		break;
440 	case SIOCSIFDSTADDR:
441 		tuninit(tp);
442 		TUNDEBUG("%s: destination address set\n", ifp->if_xname);
443 		break;
444 	case SIOCSIFBRDADDR:
445 		TUNDEBUG("%s: broadcast address set\n", ifp->if_xname);
446 		break;
447 	case SIOCSIFMTU: {
448 		struct ifreq *ifr = (struct ifreq *) data;
449 		if (ifr->ifr_mtu > TUNMTU || ifr->ifr_mtu < 576) {
450 		    error = EINVAL;
451 		    break;
452 		}
453 		TUNDEBUG("%s: interface mtu set\n", ifp->if_xname);
454 		ifp->if_mtu = ifr->ifr_mtu;
455 		break;
456 	}
457 	case SIOCADDMULTI:
458 	case SIOCDELMULTI: {
459 		struct ifreq *ifr = (struct ifreq *) data;
460 		if (ifr == 0) {
461 	        	error = EAFNOSUPPORT;           /* XXX */
462 			break;
463 		}
464 		switch (ifr->ifr_addr.sa_family) {
465 #ifdef INET
466 		case AF_INET:
467 			break;
468 #endif
469 		default:
470 			error = EAFNOSUPPORT;
471 			break;
472 		}
473 		break;
474 	}
475 	case SIOCSIFFLAGS:
476 		break;
477 	default:
478 		error = EINVAL;
479 	}
480 
481 	simple_unlock(&tp->tun_lock);
482 	splx(s);
483 	return (error);
484 }
485 
486 /*
487  * tun_output - queue packets from higher level ready to put out.
488  */
489 int
490 tun_output(ifp, m0, dst, rt)
491 	struct ifnet   *ifp;
492 	struct mbuf    *m0;
493 	struct sockaddr *dst;
494 	struct rtentry *rt;
495 {
496 	struct tun_softc *tp = ifp->if_softc;
497 #ifdef INET
498 	int		s;
499 	int		error;
500 #endif
501 	int		mlen;
502 	ALTQ_DECL(struct altq_pktattr pktattr;)
503 
504 	s = splnet();
505 	simple_lock(&tp->tun_lock);
506 	TUNDEBUG ("%s: tun_output\n", ifp->if_xname);
507 
508 	if ((tp->tun_flags & TUN_READY) != TUN_READY) {
509 		TUNDEBUG ("%s: not ready 0%o\n", ifp->if_xname,
510 			  tp->tun_flags);
511 		m_freem (m0);
512 		error = EHOSTDOWN;
513 		goto out;
514 	}
515 
516 	/*
517 	 * if the queueing discipline needs packet classification,
518 	 * do it before prepending link headers.
519 	 */
520 	IFQ_CLASSIFY(&ifp->if_snd, m0, dst->sa_family, &pktattr);
521 
522 #if NBPFILTER > 0
523 	if (ifp->if_bpf)
524 		bpf_mtap_af(ifp->if_bpf, dst->sa_family, m0);
525 #endif
526 
527 	switch(dst->sa_family) {
528 #ifdef INET
529 	case AF_INET:
530 		if (tp->tun_flags & TUN_PREPADDR) {
531 			/* Simple link-layer header */
532 			M_PREPEND(m0, dst->sa_len, M_DONTWAIT);
533 			if (m0 == NULL) {
534 				IF_DROP(&ifp->if_snd);
535 				error = ENOBUFS;
536 				goto out;
537 			}
538 			bcopy(dst, mtod(m0, char *), dst->sa_len);
539 		}
540 		/* FALLTHROUGH */
541 	case AF_UNSPEC:
542 		IFQ_ENQUEUE(&ifp->if_snd, m0, &pktattr, error);
543 		if (error) {
544 			ifp->if_collisions++;
545 			error = EAFNOSUPPORT;
546 			goto out;
547 		}
548 		mlen = m0->m_pkthdr.len;
549 		ifp->if_opackets++;
550 		ifp->if_obytes += mlen;
551 		break;
552 #endif
553 	default:
554 		m_freem(m0);
555 		error = EAFNOSUPPORT;
556 		goto out;
557 	}
558 
559 	if (tp->tun_flags & TUN_RWAIT) {
560 		tp->tun_flags &= ~TUN_RWAIT;
561 		wakeup((caddr_t)tp);
562 	}
563 	if (tp->tun_flags & TUN_ASYNC && tp->tun_pgid)
564 		fownsignal(tp->tun_pgid, SIGIO, POLL_IN, POLLIN|POLLRDNORM,
565 		    NULL);
566 
567 	selnotify(&tp->tun_rsel, 0);
568 out:
569 	simple_unlock(&tp->tun_lock);
570 	splx(s);
571 	return (0);
572 }
573 
574 /*
575  * the cdevsw interface is now pretty minimal.
576  */
577 int
578 tunioctl(dev, cmd, data, flag, p)
579 	dev_t		dev;
580 	u_long		cmd;
581 	caddr_t		data;
582 	int		flag;
583 	struct proc	*p;
584 {
585 	struct tun_softc *tp;
586 	int s, error = 0;
587 
588 	s = splnet();
589 	tp = tun_find_unit(dev);
590 
591 	/* interface was "destroyed" already */
592 	if (tp == NULL) {
593 		error = ENXIO;
594 		goto out_nolock;
595 	}
596 
597 	switch (cmd) {
598 	case TUNSDEBUG:
599 		tundebug = *(int *)data;
600 		break;
601 
602 	case TUNGDEBUG:
603 		*(int *)data = tundebug;
604 		break;
605 
606 	case TUNSIFMODE:
607 		switch (*(int *)data & (IFF_POINTOPOINT|IFF_BROADCAST)) {
608 		case IFF_POINTOPOINT:
609 		case IFF_BROADCAST:
610 			if (tp->tun_if.if_flags & IFF_UP) {
611 				error = EBUSY;
612 				goto out;
613 			}
614 			tp->tun_if.if_flags &=
615 				~(IFF_BROADCAST|IFF_POINTOPOINT|IFF_MULTICAST);
616 			tp->tun_if.if_flags |= *(int *)data;
617 			break;
618 		default:
619 			error = EINVAL;
620 			goto out;
621 		}
622 		break;
623 
624 	case TUNSLMODE:
625 		if (*(int *)data)
626 			tp->tun_flags |= TUN_PREPADDR;
627 		else
628 			tp->tun_flags &= ~TUN_PREPADDR;
629 		break;
630 
631 	case FIONBIO:
632 		if (*(int *)data)
633 			tp->tun_flags |= TUN_NBIO;
634 		else
635 			tp->tun_flags &= ~TUN_NBIO;
636 		break;
637 
638 	case FIOASYNC:
639 		if (*(int *)data)
640 			tp->tun_flags |= TUN_ASYNC;
641 		else
642 			tp->tun_flags &= ~TUN_ASYNC;
643 		break;
644 
645 	case FIONREAD:
646 		if (tp->tun_if.if_snd.ifq_head)
647 			*(int *)data = tp->tun_if.if_snd.ifq_head->m_pkthdr.len;
648 		else
649 			*(int *)data = 0;
650 		break;
651 
652 	case TIOCSPGRP:
653 	case FIOSETOWN:
654 		error = fsetown(p, &tp->tun_pgid, cmd, data);
655 		break;
656 
657 	case TIOCGPGRP:
658 	case FIOGETOWN:
659 		error = fgetown(p, tp->tun_pgid, cmd, data);
660 		break;
661 
662 	default:
663 		error = ENOTTY;
664 	}
665 
666 out:
667 	simple_unlock(&tp->tun_lock);
668 out_nolock:
669 	splx(s);
670 	return (error);
671 }
672 
673 /*
674  * The cdevsw read interface - reads a packet at a time, or at
675  * least as much of a packet as can be read.
676  */
677 int
678 tunread(dev, uio, ioflag)
679 	dev_t		dev;
680 	struct uio	*uio;
681 	int		ioflag;
682 {
683 	struct tun_softc *tp;
684 	struct ifnet	*ifp;
685 	struct mbuf	*m, *m0;
686 	int		error = 0, len, s, index;
687 
688 	s = splnet();
689 	tp = tun_find_unit(dev);
690 
691 	/* interface was "destroyed" already */
692 	if (tp == NULL) {
693 		error = ENXIO;
694 		goto out_nolock;
695 	}
696 
697 	index = tp->tun_if.if_index;
698 	ifp = &tp->tun_if;
699 
700 	TUNDEBUG ("%s: read\n", ifp->if_xname);
701 	if ((tp->tun_flags & TUN_READY) != TUN_READY) {
702 		TUNDEBUG ("%s: not ready 0%o\n", ifp->if_xname, tp->tun_flags);
703 		error = EHOSTDOWN;
704 		goto out;
705 	}
706 
707 	tp->tun_flags &= ~TUN_RWAIT;
708 
709 	do {
710 		IFQ_DEQUEUE(&ifp->if_snd, m0);
711 		if (m0 == 0) {
712 			if (tp->tun_flags & TUN_NBIO) {
713 				error = EWOULDBLOCK;
714 				goto out;
715 			}
716 			tp->tun_flags |= TUN_RWAIT;
717 			if (ltsleep((caddr_t)tp, PZERO|PCATCH|PNORELOCK,
718 					"tunread", 0, &tp->tun_lock) != 0) {
719 				error = EINTR;
720 				goto out_nolock;
721 			} else {
722 				/*
723 				 * Maybe the interface was destroyed while
724 				 * we were sleeping, so let's ensure that
725 				 * we're looking at the same (valid) tun
726 				 * interface before looping.
727 				 */
728 				tp = tun_find_unit(dev);
729 				if (tp == NULL) {
730 					error = ENXIO;
731 					goto out_nolock;
732 				}
733 				if (tp->tun_if.if_index != index) {
734 					error = ENXIO;
735 					goto out;
736 				}
737 			}
738 		}
739 	} while (m0 == 0);
740 
741 	simple_unlock(&tp->tun_lock);
742 	splx(s);
743 
744 	/* Copy the mbuf chain */
745 	while (m0 && uio->uio_resid > 0 && error == 0) {
746 		len = min(uio->uio_resid, m0->m_len);
747 		if (len != 0)
748 			error = uiomove(mtod(m0, caddr_t), len, uio);
749 		MFREE(m0, m);
750 		m0 = m;
751 	}
752 
753 	if (m0) {
754 		TUNDEBUG("Dropping mbuf\n");
755 		m_freem(m0);
756 	}
757 	if (error)
758 		ifp->if_ierrors++;
759 
760 	return (error);
761 
762 out:
763 	simple_unlock(&tp->tun_lock);
764 out_nolock:
765 	splx(s);
766 	return (error);
767 }
768 
769 /*
770  * the cdevsw write interface - an atomic write is a packet - or else!
771  */
772 int
773 tunwrite(dev, uio, ioflag)
774 	dev_t		dev;
775 	struct uio	*uio;
776 	int		ioflag;
777 {
778 	struct tun_softc *tp;
779 	struct ifnet	*ifp;
780 	struct mbuf	*top, **mp, *m;
781 	struct ifqueue	*ifq;
782 	struct sockaddr	dst;
783 	int		isr, error = 0, s, tlen, mlen;
784 
785 	s = splnet();
786 	tp = tun_find_unit(dev);
787 
788 	/* interface was "destroyed" already */
789 	if (tp == NULL) {
790 		error = ENXIO;
791 		goto out_nolock;
792 	}
793 
794 	/* Unlock until we've got the data */
795 	simple_unlock(&tp->tun_lock);
796 	splx(s);
797 
798 	ifp = &tp->tun_if;
799 
800 	TUNDEBUG("%s: tunwrite\n", ifp->if_xname);
801 
802 	if (tp->tun_flags & TUN_PREPADDR) {
803 		if (uio->uio_resid < sizeof(dst)) {
804 			error = EIO;
805 			goto out0;
806 		}
807 		error = uiomove((caddr_t)&dst, sizeof(dst), uio);
808 		if (dst.sa_len > sizeof(dst)) {
809 			/* Duh.. */
810 			char discard;
811 			int n = dst.sa_len - sizeof(dst);
812 			while (n--)
813 				if ((error = uiomove(&discard, 1, uio)) != 0) {
814 					goto out0;
815 				}
816 		}
817 	} else {
818 #ifdef INET
819 		dst.sa_family = AF_INET;
820 #endif
821 	}
822 
823 	if (uio->uio_resid > TUNMTU) {
824 		TUNDEBUG("%s: len=%lu!\n", ifp->if_xname,
825 		    (unsigned long)uio->uio_resid);
826 		error = EIO;
827 		goto out0;
828 	}
829 
830 	switch (dst.sa_family) {
831 #ifdef INET
832 	case AF_INET:
833 		ifq = &ipintrq;
834 		isr = NETISR_IP;
835 		break;
836 #endif
837 	default:
838 		error = EAFNOSUPPORT;
839 		goto out0;
840 	}
841 
842 	tlen = uio->uio_resid;
843 
844 	/* get a header mbuf */
845 	MGETHDR(m, M_DONTWAIT, MT_DATA);
846 	if (m == NULL) {
847 		error = ENOBUFS;
848 		goto out0;
849 	}
850 	mlen = MHLEN;
851 
852 	top = NULL;
853 	mp = &top;
854 	while (error == 0 && uio->uio_resid > 0) {
855 		m->m_len = min(mlen, uio->uio_resid);
856 		error = uiomove(mtod(m, caddr_t), m->m_len, uio);
857 		*mp = m;
858 		mp = &m->m_next;
859 		if (error == 0 && uio->uio_resid > 0) {
860 			MGET(m, M_DONTWAIT, MT_DATA);
861 			if (m == NULL) {
862 				error = ENOBUFS;
863 				break;
864 			}
865 			mlen = MLEN;
866 		}
867 	}
868 	if (error) {
869 		if (top != NULL)
870 			m_freem (top);
871 		ifp->if_ierrors++;
872 		goto out0;
873 	}
874 
875 	top->m_pkthdr.len = tlen;
876 	top->m_pkthdr.rcvif = ifp;
877 
878 #if NBPFILTER > 0
879 	if (ifp->if_bpf)
880 		bpf_mtap_af(ifp->if_bpf, AF_INET, top);
881 #endif
882 
883 	s = splnet();
884 	simple_lock(&tp->tun_lock);
885 	if ((tp->tun_flags & TUN_INITED) == 0) {
886 		/* Interface was destroyed */
887 		error = ENXIO;
888 		goto out;
889 	}
890 	if (IF_QFULL(ifq)) {
891 		IF_DROP(ifq);
892 		ifp->if_collisions++;
893 		m_freem(top);
894 		error = ENOBUFS;
895 		goto out;
896 	}
897 
898 	IF_ENQUEUE(ifq, top);
899 	ifp->if_ipackets++;
900 	ifp->if_ibytes += tlen;
901 	schednetisr(isr);
902 out:
903 	simple_unlock(&tp->tun_lock);
904 out_nolock:
905 	splx(s);
906 out0:
907 	return (error);
908 }
909 
910 #ifdef ALTQ
911 /*
912  * Start packet transmission on the interface.
913  * when the interface queue is rate-limited by ALTQ or TBR,
914  * if_start is needed to drain packets from the queue in order
915  * to notify readers when outgoing packets become ready.
916  *
917  * Should be called at splnet.
918  */
919 static void
920 tunstart(ifp)
921 	struct ifnet *ifp;
922 {
923 	struct tun_softc *tp = ifp->if_softc;
924 
925 	if (!ALTQ_IS_ENABLED(&ifp->if_snd) && !TBR_IS_ENABLED(&ifp->if_snd))
926 		return;
927 
928 	simple_lock(&tp->tun_lock);
929 	if (!IF_IS_EMPTY(&ifp->if_snd)) {
930 		if (tp->tun_flags & TUN_RWAIT) {
931 			tp->tun_flags &= ~TUN_RWAIT;
932 			wakeup((caddr_t)tp);
933 		}
934 		if (tp->tun_flags & TUN_ASYNC && tp->tun_pgid)
935 			fownsignal(tp->tun_pgid, SIGIO, POLL_OUT,
936 				POLLOUT|POLLWRNORM, NULL);
937 
938 		selwakeup(&tp->tun_rsel);
939 	}
940 	simple_unlock(&tp->tun_lock);
941 }
942 #endif /* ALTQ */
943 /*
944  * tunpoll - the poll interface, this is only useful on reads
945  * really. The write detect always returns true, write never blocks
946  * anyway, it either accepts the packet or drops it.
947  */
948 int
949 tunpoll(dev, events, p)
950 	dev_t		dev;
951 	int		events;
952 	struct proc	*p;
953 {
954 	struct tun_softc *tp;
955 	struct ifnet	*ifp;
956 	int		s, revents = 0;
957 
958 	s = splnet();
959 	tp = tun_find_unit(dev);
960 
961 	/* interface was "destroyed" already */
962 	if (tp == NULL)
963 		goto out_nolock;
964 
965 	ifp = &tp->tun_if;
966 
967 	TUNDEBUG("%s: tunpoll\n", ifp->if_xname);
968 
969 	if (events & (POLLIN | POLLRDNORM)) {
970 		if (!IFQ_IS_EMPTY(&ifp->if_snd)) {
971 			TUNDEBUG("%s: tunpoll q=%d\n", ifp->if_xname,
972 			    ifp->if_snd.ifq_len);
973 			revents |= events & (POLLIN | POLLRDNORM);
974 		} else {
975 			TUNDEBUG("%s: tunpoll waiting\n", ifp->if_xname);
976 			selrecord(p, &tp->tun_rsel);
977 		}
978 	}
979 
980 	if (events & (POLLOUT | POLLWRNORM))
981 		revents |= events & (POLLOUT | POLLWRNORM);
982 
983 	simple_unlock(&tp->tun_lock);
984 out_nolock:
985 	splx(s);
986 	return (revents);
987 }
988 
989 static void
990 filt_tunrdetach(struct knote *kn)
991 {
992 	struct tun_softc *tp = kn->kn_hook;
993 	int s;
994 
995 	s = splnet();
996 	SLIST_REMOVE(&tp->tun_rsel.sel_klist, kn, knote, kn_selnext);
997 	splx(s);
998 }
999 
1000 static int
1001 filt_tunread(struct knote *kn, long hint)
1002 {
1003 	struct tun_softc *tp = kn->kn_hook;
1004 	struct ifnet *ifp = &tp->tun_if;
1005 	struct mbuf *m;
1006 	int s;
1007 
1008 	s = splnet();
1009 	IF_POLL(&ifp->if_snd, m);
1010 	if (m == NULL) {
1011 		splx(s);
1012 		return (0);
1013 	}
1014 
1015 	for (kn->kn_data = 0; m != NULL; m = m->m_next)
1016 		kn->kn_data += m->m_len;
1017 
1018 	splx(s);
1019 	return (1);
1020 }
1021 
1022 static const struct filterops tunread_filtops =
1023 	{ 1, NULL, filt_tunrdetach, filt_tunread };
1024 
1025 static const struct filterops tun_seltrue_filtops =
1026 	{ 1, NULL, filt_tunrdetach, filt_seltrue };
1027 
1028 int
1029 tunkqfilter(dev_t dev, struct knote *kn)
1030 {
1031 	struct tun_softc *tp;
1032 	struct klist *klist;
1033 	int rv = 0, s;
1034 
1035 	s = splnet();
1036 	tp = tun_find_unit(dev);
1037 	if (tp == NULL)
1038 		goto out_nolock;
1039 
1040 	switch (kn->kn_filter) {
1041 	case EVFILT_READ:
1042 		klist = &tp->tun_rsel.sel_klist;
1043 		kn->kn_fop = &tunread_filtops;
1044 		break;
1045 
1046 	case EVFILT_WRITE:
1047 		klist = &tp->tun_rsel.sel_klist;
1048 		kn->kn_fop = &tun_seltrue_filtops;
1049 		break;
1050 
1051 	default:
1052 		rv = 1;
1053 		goto out;
1054 	}
1055 
1056 	kn->kn_hook = tp;
1057 
1058 	SLIST_INSERT_HEAD(klist, kn, kn_selnext);
1059 
1060 out:
1061 	simple_unlock(&tp->tun_lock);
1062 out_nolock:
1063 	splx(s);
1064 	return (rv);
1065 }
1066