xref: /openbsd-src/sys/net/if_tun.c (revision 1a8dbaac879b9f3335ad7fb25429ce63ac1d6bac)
1 /*	$OpenBSD: if_tun.c,v 1.227 2020/10/04 06:59:16 anton Exp $	*/
2 /*	$NetBSD: if_tun.c,v 1.24 1996/05/07 02:40:48 thorpej Exp $	*/
3 
4 /*
5  * Copyright (c) 1988, Julian Onions <Julian.Onions@nexor.co.uk>
6  * Nottingham University 1987.
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29 
30 /*
31  * This driver takes packets off the IP i/f and hands them up to a
32  * user process to have its wicked way with. This driver has its
33  * roots in a similar driver written by Phil Cockcroft (formerly) at
34  * UCL. This driver is based much more on read/write/select mode of
35  * operation though.
36  */
37 
38 /* #define	TUN_DEBUG	9 */
39 
40 #include <sys/param.h>
41 #include <sys/kernel.h>
42 #include <sys/proc.h>
43 #include <sys/systm.h>
44 #include <sys/mbuf.h>
45 #include <sys/protosw.h>
46 #include <sys/sigio.h>
47 #include <sys/socket.h>
48 #include <sys/ioctl.h>
49 #include <sys/errno.h>
50 #include <sys/syslog.h>
51 #include <sys/selinfo.h>
52 #include <sys/fcntl.h>
53 #include <sys/time.h>
54 #include <sys/device.h>
55 #include <sys/vnode.h>
56 #include <sys/signalvar.h>
57 #include <sys/poll.h>
58 #include <sys/conf.h>
59 #include <sys/smr.h>
60 
61 #include <net/if.h>
62 #include <net/if_types.h>
63 #include <net/netisr.h>
64 #include <net/rtable.h>
65 
66 #include <netinet/in.h>
67 #include <netinet/if_ether.h>
68 
69 #include "bpfilter.h"
70 #if NBPFILTER > 0
71 #include <net/bpf.h>
72 #endif
73 
74 #ifdef MPLS
75 #include <netmpls/mpls.h>
76 #endif /* MPLS */
77 
78 #include <net/if_tun.h>
79 
80 struct tun_softc {
81 	struct arpcom		sc_ac;		/* ethernet common data */
82 #define sc_if			sc_ac.ac_if
83 	struct selinfo		sc_rsel;	/* read select */
84 	struct selinfo		sc_wsel;	/* write select (not used) */
85 	SMR_LIST_ENTRY(tun_softc)
86 				sc_entry;	/* all tunnel interfaces */
87 	int			sc_unit;
88 	struct sigio_ref	sc_sigio;	/* async I/O registration */
89 	unsigned int		sc_flags;	/* misc flags */
90 #define TUN_DEAD			(1 << 16)
91 
92 	dev_t			sc_dev;
93 	struct refcnt		sc_refs;
94 	unsigned int		sc_reading;
95 };
96 
97 #ifdef	TUN_DEBUG
98 int	tundebug = TUN_DEBUG;
99 #define TUNDEBUG(a)	(tundebug? printf a : 0)
100 #else
101 #define TUNDEBUG(a)	/* (tundebug? printf a : 0) */
102 #endif
103 
104 /* Only these IFF flags are changeable by TUNSIFINFO */
105 #define TUN_IFF_FLAGS (IFF_UP|IFF_POINTOPOINT|IFF_MULTICAST|IFF_BROADCAST)
106 
107 void	tunattach(int);
108 
109 int	tun_dev_open(dev_t, const struct if_clone *, int, struct proc *);
110 int	tun_dev_close(dev_t, struct proc *);
111 int	tun_dev_ioctl(dev_t, u_long, void *);
112 int	tun_dev_read(dev_t, struct uio *, int);
113 int	tun_dev_write(dev_t, struct uio *, int, int);
114 int	tun_dev_poll(dev_t, int, struct proc *);
115 int	tun_dev_kqfilter(dev_t, struct knote *);
116 
117 int	tun_ioctl(struct ifnet *, u_long, caddr_t);
118 void	tun_input(struct ifnet *, struct mbuf *);
119 int	tun_output(struct ifnet *, struct mbuf *, struct sockaddr *,
120 	    struct rtentry *);
121 int	tun_enqueue(struct ifnet *, struct mbuf *);
122 int	tun_clone_create(struct if_clone *, int);
123 int	tap_clone_create(struct if_clone *, int);
124 int	tun_create(struct if_clone *, int, int);
125 int	tun_clone_destroy(struct ifnet *);
126 void	tun_wakeup(struct tun_softc *);
127 int	tun_init(struct tun_softc *);
128 void	tun_start(struct ifnet *);
129 int	filt_tunread(struct knote *, long);
130 int	filt_tunwrite(struct knote *, long);
131 void	filt_tunrdetach(struct knote *);
132 void	filt_tunwdetach(struct knote *);
133 void	tun_link_state(struct tun_softc *, int);
134 
135 const struct filterops tunread_filtops = {
136 	.f_flags	= FILTEROP_ISFD,
137 	.f_attach	= NULL,
138 	.f_detach	= filt_tunrdetach,
139 	.f_event	= filt_tunread,
140 };
141 
142 const struct filterops tunwrite_filtops = {
143 	.f_flags	= FILTEROP_ISFD,
144 	.f_attach	= NULL,
145 	.f_detach	= filt_tunwdetach,
146 	.f_event	= filt_tunwrite,
147 };
148 
149 SMR_LIST_HEAD(tun_list, tun_softc);
150 
151 struct if_clone tun_cloner =
152     IF_CLONE_INITIALIZER("tun", tun_clone_create, tun_clone_destroy);
153 
154 struct if_clone tap_cloner =
155     IF_CLONE_INITIALIZER("tap", tap_clone_create, tun_clone_destroy);
156 
157 void
158 tunattach(int n)
159 {
160 	if_clone_attach(&tun_cloner);
161 	if_clone_attach(&tap_cloner);
162 }
163 
164 int
165 tun_clone_create(struct if_clone *ifc, int unit)
166 {
167 	return (tun_create(ifc, unit, 0));
168 }
169 
170 int
171 tap_clone_create(struct if_clone *ifc, int unit)
172 {
173 	return (tun_create(ifc, unit, TUN_LAYER2));
174 }
175 
176 struct tun_list tun_devs_list = SMR_LIST_HEAD_INITIALIZER(tun_list);
177 
178 struct tun_softc *
179 tun_name_lookup(const char *name)
180 {
181 	struct tun_softc *sc;
182 
183 	KERNEL_ASSERT_LOCKED();
184 
185 	SMR_LIST_FOREACH_LOCKED(sc, &tun_devs_list, sc_entry) {
186 		if (strcmp(sc->sc_if.if_xname, name) == 0)
187 			return (sc);
188 	}
189 
190 	return (NULL);
191 }
192 
193 int
194 tun_insert(struct tun_softc *sc)
195 {
196 	int error = 0;
197 
198 	/* check for a race */
199 	if (tun_name_lookup(sc->sc_if.if_xname) != NULL)
200 		error = EEXIST;
201 	else {
202 		/* tun_name_lookup checks for the right lock already */
203 		SMR_LIST_INSERT_HEAD_LOCKED(&tun_devs_list, sc, sc_entry);
204 	}
205 
206 	return (error);
207 }
208 
209 int
210 tun_create(struct if_clone *ifc, int unit, int flags)
211 {
212 	struct tun_softc	*sc;
213 	struct ifnet		*ifp;
214 
215 	if (unit > minor(~0U))
216 		return (ENXIO);
217 
218 	KERNEL_ASSERT_LOCKED();
219 
220 	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
221 	ifp = &sc->sc_if;
222 	snprintf(ifp->if_xname, sizeof(ifp->if_xname),
223 	    "%s%d", ifc->ifc_name, unit);
224 	ifp->if_softc = sc;
225 
226 	/* this is enough state for tun_dev_open to work with */
227 
228 	if (tun_insert(sc) != 0)
229 		goto exists;
230 
231 	/* build the interface */
232 
233 	ifp->if_ioctl = tun_ioctl;
234 	ifp->if_enqueue = tun_enqueue;
235 	ifp->if_start = tun_start;
236 	ifp->if_hardmtu = TUNMRU;
237 	ifp->if_link_state = LINK_STATE_DOWN;
238 
239 	if_counters_alloc(ifp);
240 
241 	if ((flags & TUN_LAYER2) == 0) {
242 		ifp->if_input = tun_input;
243 		ifp->if_output = tun_output;
244 		ifp->if_mtu = ETHERMTU;
245 		ifp->if_flags = (IFF_POINTOPOINT|IFF_MULTICAST);
246 		ifp->if_type = IFT_TUNNEL;
247 		ifp->if_hdrlen = sizeof(u_int32_t);
248 		ifp->if_rtrequest = p2p_rtrequest;
249 
250 		if_attach(ifp);
251 		if_alloc_sadl(ifp);
252 
253 #if NBPFILTER > 0
254 		bpfattach(&ifp->if_bpf, ifp, DLT_LOOP, sizeof(u_int32_t));
255 #endif
256 	} else {
257 		sc->sc_flags |= TUN_LAYER2;
258 		ether_fakeaddr(ifp);
259 		ifp->if_flags =
260 		    (IFF_BROADCAST|IFF_SIMPLEX|IFF_MULTICAST);
261 
262 		if_attach(ifp);
263 		ether_ifattach(ifp);
264 	}
265 
266 	sigio_init(&sc->sc_sigio);
267 	refcnt_init(&sc->sc_refs);
268 
269 	/* tell tun_dev_open we're initialised */
270 
271 	sc->sc_flags |= TUN_INITED|TUN_STAYUP;
272 	wakeup(sc);
273 
274 	return (0);
275 
276 exists:
277 	free(sc, M_DEVBUF, sizeof(*sc));
278 	return (EEXIST);
279 }
280 
281 int
282 tun_clone_destroy(struct ifnet *ifp)
283 {
284 	struct tun_softc	*sc = ifp->if_softc;
285 	dev_t			 dev;
286 	int			 s;
287 
288 	KERNEL_ASSERT_LOCKED();
289 
290 	if (ISSET(sc->sc_flags, TUN_DEAD))
291 		return (ENXIO);
292 	SET(sc->sc_flags, TUN_DEAD);
293 
294 	/* kick userland off the device */
295 	dev = sc->sc_dev;
296 	if (dev) {
297 		struct vnode *vp;
298 
299 		if (vfinddev(dev, VCHR, &vp))
300 			VOP_REVOKE(vp, REVOKEALL);
301 
302 		KASSERT(sc->sc_dev == 0);
303 	}
304 
305 	/* prevent userland from getting to the device again */
306 	SMR_LIST_REMOVE_LOCKED(sc, sc_entry);
307 	smr_barrier();
308 
309 	/* help read() give up */
310 	if (sc->sc_reading)
311 		wakeup(&ifp->if_snd);
312 
313 	/* wait for device entrypoints to finish */
314 	refcnt_finalize(&sc->sc_refs, "tundtor");
315 
316 	s = splhigh();
317 	klist_invalidate(&sc->sc_rsel.si_note);
318 	klist_invalidate(&sc->sc_wsel.si_note);
319 	splx(s);
320 
321 	if (ISSET(sc->sc_flags, TUN_LAYER2))
322 		ether_ifdetach(ifp);
323 
324 	if_detach(ifp);
325 	sigio_free(&sc->sc_sigio);
326 
327 	free(sc, M_DEVBUF, sizeof *sc);
328 	return (0);
329 }
330 
331 static struct tun_softc *
332 tun_get(dev_t dev)
333 {
334 	struct tun_softc *sc;
335 
336 	smr_read_enter();
337 	SMR_LIST_FOREACH(sc, &tun_devs_list, sc_entry) {
338 		if (sc->sc_dev == dev) {
339 			refcnt_take(&sc->sc_refs);
340 			break;
341 		}
342 	}
343 	smr_read_leave();
344 
345 	return (sc);
346 }
347 
348 static inline void
349 tun_put(struct tun_softc *sc)
350 {
351 	refcnt_rele_wake(&sc->sc_refs);
352 }
353 
354 int
355 tunopen(dev_t dev, int flag, int mode, struct proc *p)
356 {
357 	return (tun_dev_open(dev, &tun_cloner, mode, p));
358 }
359 
360 int
361 tapopen(dev_t dev, int flag, int mode, struct proc *p)
362 {
363 	return (tun_dev_open(dev, &tap_cloner, mode, p));
364 }
365 
366 int
367 tun_dev_open(dev_t dev, const struct if_clone *ifc, int mode, struct proc *p)
368 {
369 	struct tun_softc *sc;
370 	struct ifnet *ifp;
371 	int error;
372 	u_short stayup = 0;
373 
374 	char name[IFNAMSIZ];
375 	unsigned int rdomain;
376 
377 	snprintf(name, sizeof(name), "%s%u", ifc->ifc_name, minor(dev));
378 	rdomain = rtable_l2(p->p_p->ps_rtableid);
379 
380 	/* let's find or make an interface to work with */
381 	while ((ifp = ifunit(name)) == NULL) {
382 		error = if_clone_create(name, rdomain);
383 		switch (error) {
384 		case 0: /* it's probably ours */
385 			stayup = TUN_STAYUP;
386 			/* FALLTHROUGH */
387 		case EEXIST: /* we may have lost a race with someone else */
388 			break;
389 		default:
390 			return (error);
391 		}
392 	}
393 
394 	sc = ifp->if_softc;
395 	/* wait for it to be fully constructed before we use it */
396 	while (!ISSET(sc->sc_flags, TUN_INITED)) {
397 		error = tsleep_nsec(sc, PCATCH, "tuninit", INFSLP);
398 		if (error != 0) {
399 			/* XXX if_clone_destroy if stayup? */
400 			return (error);
401 		}
402 	}
403 
404 	if (sc->sc_dev != 0) {
405 		/* aww, we lost */
406 		return (EBUSY);
407 	}
408 	/* it's ours now */
409 	sc->sc_dev = dev;
410 	CLR(sc->sc_flags, stayup);
411 
412 	/* automatically mark the interface running on open */
413 	SET(ifp->if_flags, IFF_UP | IFF_RUNNING);
414 	tun_link_state(sc, LINK_STATE_FULL_DUPLEX);
415 
416 	return (0);
417 }
418 
419 /*
420  * tunclose - close the device; if closing the real device, flush pending
421  *  output and unless STAYUP bring down and destroy the interface.
422  */
423 int
424 tunclose(dev_t dev, int flag, int mode, struct proc *p)
425 {
426 	return (tun_dev_close(dev, p));
427 }
428 
429 int
430 tapclose(dev_t dev, int flag, int mode, struct proc *p)
431 {
432 	return (tun_dev_close(dev, p));
433 }
434 
435 int
436 tun_dev_close(dev_t dev, struct proc *p)
437 {
438 	struct tun_softc	*sc;
439 	struct ifnet		*ifp;
440 	int			 error = 0;
441 	char			 name[IFNAMSIZ];
442 	int			 destroy = 0;
443 
444 	sc = tun_get(dev);
445 	if (sc == NULL)
446 		return (ENXIO);
447 
448 	ifp = &sc->sc_if;
449 
450 	/*
451 	 * junk all pending output
452 	 */
453 	CLR(ifp->if_flags, IFF_UP | IFF_RUNNING);
454 	ifq_purge(&ifp->if_snd);
455 
456 	CLR(sc->sc_flags, TUN_ASYNC);
457 	selwakeup(&sc->sc_rsel);
458 	sigio_free(&sc->sc_sigio);
459 
460 	if (!ISSET(sc->sc_flags, TUN_DEAD)) {
461 		/* we can't hold a reference to sc before we start a dtor */
462 		if (!ISSET(sc->sc_flags, TUN_STAYUP)) {
463 			destroy = 1;
464 			strlcpy(name, ifp->if_xname, sizeof(name));
465 		} else {
466 			CLR(ifp->if_flags, IFF_UP | IFF_RUNNING);
467 			tun_link_state(sc, LINK_STATE_DOWN);
468 		}
469 	}
470 
471 	sc->sc_dev = 0;
472 
473 	tun_put(sc);
474 
475 	if (destroy)
476 		if_clone_destroy(name);
477 
478 	return (error);
479 }
480 
481 int
482 tun_init(struct tun_softc *sc)
483 {
484 	struct ifnet	*ifp = &sc->sc_if;
485 	struct ifaddr	*ifa;
486 
487 	TUNDEBUG(("%s: tun_init\n", ifp->if_xname));
488 
489 	ifp->if_flags |= IFF_UP | IFF_RUNNING;
490 
491 	sc->sc_flags &= ~(TUN_IASET|TUN_DSTADDR|TUN_BRDADDR);
492 	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
493 		if (ifa->ifa_addr->sa_family == AF_INET) {
494 			struct sockaddr_in *sin;
495 
496 			sin = satosin(ifa->ifa_addr);
497 			if (sin && sin->sin_addr.s_addr)
498 				sc->sc_flags |= TUN_IASET;
499 
500 			if (ifp->if_flags & IFF_POINTOPOINT) {
501 				sin = satosin(ifa->ifa_dstaddr);
502 				if (sin && sin->sin_addr.s_addr)
503 					sc->sc_flags |= TUN_DSTADDR;
504 			} else
505 				sc->sc_flags &= ~TUN_DSTADDR;
506 
507 			if (ifp->if_flags & IFF_BROADCAST) {
508 				sin = satosin(ifa->ifa_broadaddr);
509 				if (sin && sin->sin_addr.s_addr)
510 					sc->sc_flags |= TUN_BRDADDR;
511 			} else
512 				sc->sc_flags &= ~TUN_BRDADDR;
513 		}
514 #ifdef INET6
515 		if (ifa->ifa_addr->sa_family == AF_INET6) {
516 			struct sockaddr_in6 *sin6;
517 
518 			sin6 = satosin6(ifa->ifa_addr);
519 			if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
520 				sc->sc_flags |= TUN_IASET;
521 
522 			if (ifp->if_flags & IFF_POINTOPOINT) {
523 				sin6 = satosin6(ifa->ifa_dstaddr);
524 				if (sin6 &&
525 				    !IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
526 					sc->sc_flags |= TUN_DSTADDR;
527 			} else
528 				sc->sc_flags &= ~TUN_DSTADDR;
529 		}
530 #endif /* INET6 */
531 	}
532 
533 	return (0);
534 }
535 
536 /*
537  * Process an ioctl request.
538  */
539 int
540 tun_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
541 {
542 	struct tun_softc	*sc = (struct tun_softc *)(ifp->if_softc);
543 	struct ifreq		*ifr = (struct ifreq *)data;
544 	int			 error = 0;
545 
546 	switch (cmd) {
547 	case SIOCSIFADDR:
548 		tun_init(sc);
549 		break;
550 	case SIOCSIFFLAGS:
551 		if (ISSET(ifp->if_flags, IFF_UP))
552 			SET(ifp->if_flags, IFF_RUNNING);
553 		else
554 			CLR(ifp->if_flags, IFF_RUNNING);
555 		break;
556 
557 	case SIOCSIFDSTADDR:
558 		tun_init(sc);
559 		TUNDEBUG(("%s: destination address set\n", ifp->if_xname));
560 		break;
561 	case SIOCSIFMTU:
562 		if (ifr->ifr_mtu < ETHERMIN || ifr->ifr_mtu > TUNMRU)
563 			error = EINVAL;
564 		else
565 			ifp->if_mtu = ifr->ifr_mtu;
566 		break;
567 	case SIOCADDMULTI:
568 	case SIOCDELMULTI:
569 		break;
570 	default:
571 		if (sc->sc_flags & TUN_LAYER2)
572 			error = ether_ioctl(ifp, &sc->sc_ac, cmd, data);
573 		else
574 			error = ENOTTY;
575 	}
576 
577 	return (error);
578 }
579 
580 /*
581  * tun_output - queue packets from higher level ready to put out.
582  */
583 int
584 tun_output(struct ifnet *ifp, struct mbuf *m0, struct sockaddr *dst,
585     struct rtentry *rt)
586 {
587 	u_int32_t		*af;
588 
589 	if (!ISSET(ifp->if_flags, IFF_RUNNING)) {
590 		m_freem(m0);
591 		return (EHOSTDOWN);
592 	}
593 
594 	M_PREPEND(m0, sizeof(*af), M_DONTWAIT);
595 	if (m0 == NULL)
596 		return (ENOBUFS);
597 	af = mtod(m0, u_int32_t *);
598 	*af = htonl(dst->sa_family);
599 
600 	return (if_enqueue(ifp, m0));
601 }
602 
603 int
604 tun_enqueue(struct ifnet *ifp, struct mbuf *m0)
605 {
606 	struct tun_softc	*sc = ifp->if_softc;
607 	int			 error;
608 
609 	error = ifq_enqueue(&ifp->if_snd, m0);
610 	if (error != 0)
611 		return (error);
612 
613 	tun_wakeup(sc);
614 
615 	return (0);
616 }
617 
618 void
619 tun_wakeup(struct tun_softc *sc)
620 {
621 	if (sc->sc_reading)
622 		wakeup(&sc->sc_if.if_snd);
623 
624 	selwakeup(&sc->sc_rsel);
625 	if (sc->sc_flags & TUN_ASYNC)
626 		pgsigio(&sc->sc_sigio, SIGIO, 0);
627 }
628 
629 /*
630  * the cdevsw interface is now pretty minimal.
631  */
632 int
633 tunioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p)
634 {
635 	return (tun_dev_ioctl(dev, cmd, data));
636 }
637 
638 int
639 tapioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p)
640 {
641 	return (tun_dev_ioctl(dev, cmd, data));
642 }
643 
644 int
645 tun_dev_ioctl(dev_t dev, u_long cmd, void *data)
646 {
647 	struct tun_softc	*sc;
648 	struct tuninfo		*tunp;
649 	int			 error = 0;
650 
651 	sc = tun_get(dev);
652 	if (sc == NULL)
653 		return (ENXIO);
654 
655 	switch (cmd) {
656 	case TUNSIFINFO:
657 		tunp = (struct tuninfo *)data;
658 		if (tunp->mtu < ETHERMIN || tunp->mtu > TUNMRU) {
659 			error = EINVAL;
660 			break;
661 		}
662 		if (tunp->type != sc->sc_if.if_type) {
663 			error = EINVAL;
664 			break;
665 		}
666 		sc->sc_if.if_mtu = tunp->mtu;
667 		sc->sc_if.if_flags =
668 		    (tunp->flags & TUN_IFF_FLAGS) |
669 		    (sc->sc_if.if_flags & ~TUN_IFF_FLAGS);
670 		sc->sc_if.if_baudrate = tunp->baudrate;
671 		break;
672 	case TUNGIFINFO:
673 		tunp = (struct tuninfo *)data;
674 		tunp->mtu = sc->sc_if.if_mtu;
675 		tunp->type = sc->sc_if.if_type;
676 		tunp->flags = sc->sc_if.if_flags;
677 		tunp->baudrate = sc->sc_if.if_baudrate;
678 		break;
679 #ifdef TUN_DEBUG
680 	case TUNSDEBUG:
681 		tundebug = *(int *)data;
682 		break;
683 	case TUNGDEBUG:
684 		*(int *)data = tundebug;
685 		break;
686 #endif
687 	case TUNSIFMODE:
688 		switch (*(int *)data & (IFF_POINTOPOINT|IFF_BROADCAST)) {
689 		case IFF_POINTOPOINT:
690 		case IFF_BROADCAST:
691 			sc->sc_if.if_flags &= ~TUN_IFF_FLAGS;
692 			sc->sc_if.if_flags |= *(int *)data & TUN_IFF_FLAGS;
693 			break;
694 		default:
695 			error = EINVAL;
696 			break;
697 		}
698 		break;
699 
700 	case FIONBIO:
701 		break;
702 	case FIOASYNC:
703 		if (*(int *)data)
704 			sc->sc_flags |= TUN_ASYNC;
705 		else
706 			sc->sc_flags &= ~TUN_ASYNC;
707 		break;
708 	case FIONREAD:
709 		*(int *)data = ifq_hdatalen(&sc->sc_if.if_snd);
710 		break;
711 	case FIOSETOWN:
712 	case TIOCSPGRP:
713 		return (sigio_setown(&sc->sc_sigio, cmd, data));
714 	case FIOGETOWN:
715 	case TIOCGPGRP:
716 		sigio_getown(&sc->sc_sigio, cmd, data);
717 		break;
718 	case SIOCGIFADDR:
719 		if (!(sc->sc_flags & TUN_LAYER2)) {
720 			error = EINVAL;
721 			break;
722 		}
723 		bcopy(sc->sc_ac.ac_enaddr, data,
724 		    sizeof(sc->sc_ac.ac_enaddr));
725 		break;
726 
727 	case SIOCSIFADDR:
728 		if (!(sc->sc_flags & TUN_LAYER2)) {
729 			error = EINVAL;
730 			break;
731 		}
732 		bcopy(data, sc->sc_ac.ac_enaddr,
733 		    sizeof(sc->sc_ac.ac_enaddr));
734 		break;
735 	default:
736 		error = ENOTTY;
737 		break;
738 	}
739 
740 	tun_put(sc);
741 	return (error);
742 }
743 
744 /*
745  * The cdevsw read interface - reads a packet at a time, or at
746  * least as much of a packet as can be read.
747  */
748 int
749 tunread(dev_t dev, struct uio *uio, int ioflag)
750 {
751 	return (tun_dev_read(dev, uio, ioflag));
752 }
753 
754 int
755 tapread(dev_t dev, struct uio *uio, int ioflag)
756 {
757 	return (tun_dev_read(dev, uio, ioflag));
758 }
759 
760 int
761 tun_dev_read(dev_t dev, struct uio *uio, int ioflag)
762 {
763 	struct tun_softc	*sc;
764 	struct ifnet		*ifp;
765 	struct mbuf		*m, *m0;
766 	int			 error = 0;
767 
768 	sc = tun_get(dev);
769 	if (sc == NULL)
770 		return (ENXIO);
771 
772 	ifp = &sc->sc_if;
773 
774 	error = ifq_deq_sleep(&ifp->if_snd, &m0, ISSET(ioflag, IO_NDELAY),
775 	    (PZERO + 1)|PCATCH, "tunread", &sc->sc_reading, &sc->sc_dev);
776 	if (error != 0)
777 		goto put;
778 
779 #if NBPFILTER > 0
780 	if (ifp->if_bpf)
781 		bpf_mtap(ifp->if_bpf, m0, BPF_DIRECTION_OUT);
782 #endif
783 
784 	m = m0;
785 	while (uio->uio_resid > 0) {
786 		size_t len = ulmin(uio->uio_resid, m->m_len);
787 		if (len > 0) {
788 			error = uiomove(mtod(m, void *), len, uio);
789 			if (error != 0)
790 				break;
791 		}
792 
793 		m = m->m_next;
794 		if (m == NULL)
795 			break;
796 	}
797 
798 	m_freem(m0);
799 
800 put:
801 	tun_put(sc);
802 	return (error);
803 }
804 
805 /*
806  * the cdevsw write interface - an atomic write is a packet - or else!
807  */
808 int
809 tunwrite(dev_t dev, struct uio *uio, int ioflag)
810 {
811 	return (tun_dev_write(dev, uio, ioflag, 0));
812 }
813 
814 int
815 tapwrite(dev_t dev, struct uio *uio, int ioflag)
816 {
817 	return (tun_dev_write(dev, uio, ioflag, ETHER_ALIGN));
818 }
819 
820 int
821 tun_dev_write(dev_t dev, struct uio *uio, int ioflag, int align)
822 {
823 	struct tun_softc	*sc;
824 	struct ifnet		*ifp;
825 	struct mbuf		*m0;
826 	int			error = 0;
827 	size_t			mlen;
828 
829 	sc = tun_get(dev);
830 	if (sc == NULL)
831 		return (ENXIO);
832 
833 	ifp = &sc->sc_if;
834 
835 	if (uio->uio_resid < ifp->if_hdrlen ||
836 	    uio->uio_resid > (ifp->if_hdrlen + ifp->if_hardmtu)) {
837 		error = EMSGSIZE;
838 		goto put;
839 	}
840 
841 	align += max_linkhdr;
842 	mlen = align + uio->uio_resid;
843 
844 	m0 = m_gethdr(M_DONTWAIT, MT_DATA);
845 	if (m0 == NULL) {
846 		error = ENOMEM;
847 		goto put;
848 	}
849 	if (mlen > MHLEN) {
850 		m_clget(m0, M_DONTWAIT, mlen);
851 		if (!ISSET(m0->m_flags, M_EXT)) {
852 			error = ENOMEM;
853 			goto drop;
854 		}
855 	}
856 
857 	m_align(m0, mlen);
858 	m0->m_pkthdr.len = m0->m_len = mlen;
859 	m_adj(m0, align);
860 
861 	error = uiomove(mtod(m0, void *), m0->m_len, uio);
862 	if (error != 0)
863 		goto drop;
864 
865 	NET_LOCK();
866 	if_vinput(ifp, m0);
867 	NET_UNLOCK();
868 
869 	tun_put(sc);
870 	return (0);
871 
872 drop:
873 	m_freem(m0);
874 put:
875 	tun_put(sc);
876 	return (error);
877 }
878 
879 void
880 tun_input(struct ifnet *ifp, struct mbuf *m0)
881 {
882 	uint32_t		af;
883 
884 	KASSERT(m0->m_len >= sizeof(af));
885 
886 	af = *mtod(m0, uint32_t *);
887 	/* strip the tunnel header */
888 	m_adj(m0, sizeof(af));
889 
890 	switch (ntohl(af)) {
891 	case AF_INET:
892 		ipv4_input(ifp, m0);
893 		break;
894 #ifdef INET6
895 	case AF_INET6:
896 		ipv6_input(ifp, m0);
897 		break;
898 #endif
899 #ifdef MPLS
900 	case AF_MPLS:
901 		mpls_input(ifp, m0);
902 		break;
903 #endif
904 	default:
905 		m_freem(m0);
906 		break;
907 	}
908 }
909 
910 /*
911  * tunpoll - the poll interface, this is only useful on reads
912  * really. The write detect always returns true, write never blocks
913  * anyway, it either accepts the packet or drops it.
914  */
915 int
916 tunpoll(dev_t dev, int events, struct proc *p)
917 {
918 	return (tun_dev_poll(dev, events, p));
919 }
920 
921 int
922 tappoll(dev_t dev, int events, struct proc *p)
923 {
924 	return (tun_dev_poll(dev, events, p));
925 }
926 
927 int
928 tun_dev_poll(dev_t dev, int events, struct proc *p)
929 {
930 	struct tun_softc	*sc;
931 	struct ifnet		*ifp;
932 	int			 revents;
933 
934 	sc = tun_get(dev);
935 	if (sc == NULL)
936 		return (POLLERR);
937 
938 	ifp = &sc->sc_if;
939 	revents = 0;
940 
941 	if (events & (POLLIN | POLLRDNORM)) {
942 		if (!ifq_empty(&ifp->if_snd))
943 			revents |= events & (POLLIN | POLLRDNORM);
944 		else
945 			selrecord(p, &sc->sc_rsel);
946 	}
947 	if (events & (POLLOUT | POLLWRNORM))
948 		revents |= events & (POLLOUT | POLLWRNORM);
949 
950 	tun_put(sc);
951 	return (revents);
952 }
953 
954 int
955 tunkqfilter(dev_t dev, struct knote *kn)
956 {
957 	return (tun_dev_kqfilter(dev, kn));
958 }
959 
960 int
961 tapkqfilter(dev_t dev, struct knote *kn)
962 {
963 	return (tun_dev_kqfilter(dev, kn));
964 }
965 
966 int
967 tun_dev_kqfilter(dev_t dev, struct knote *kn)
968 {
969 	struct tun_softc	*sc;
970 	struct ifnet		*ifp;
971 	struct klist		*klist;
972 	int			 error = 0;
973 	int			 s;
974 
975 	sc = tun_get(dev);
976 	if (sc == NULL)
977 		return (ENXIO);
978 
979 	ifp = &sc->sc_if;
980 
981 	switch (kn->kn_filter) {
982 	case EVFILT_READ:
983 		klist = &sc->sc_rsel.si_note;
984 		kn->kn_fop = &tunread_filtops;
985 		break;
986 	case EVFILT_WRITE:
987 		klist = &sc->sc_wsel.si_note;
988 		kn->kn_fop = &tunwrite_filtops;
989 		break;
990 	default:
991 		error = EINVAL;
992 		goto put;
993 	}
994 
995 	kn->kn_hook = (caddr_t)sc; /* XXX give the sc_ref to the hook? */
996 
997 	s = splhigh();
998 	klist_insert(klist, kn);
999 	splx(s);
1000 
1001 put:
1002 	tun_put(sc);
1003 	return (error);
1004 }
1005 
1006 void
1007 filt_tunrdetach(struct knote *kn)
1008 {
1009 	int			 s;
1010 	struct tun_softc	*sc = kn->kn_hook;
1011 
1012 	s = splhigh();
1013 	klist_remove(&sc->sc_rsel.si_note, kn);
1014 	splx(s);
1015 }
1016 
1017 int
1018 filt_tunread(struct knote *kn, long hint)
1019 {
1020 	struct tun_softc	*sc = kn->kn_hook;
1021 	struct ifnet		*ifp = &sc->sc_if;
1022 
1023 	kn->kn_data = ifq_hdatalen(&ifp->if_snd);
1024 
1025 	return (kn->kn_data > 0);
1026 }
1027 
1028 void
1029 filt_tunwdetach(struct knote *kn)
1030 {
1031 	int			 s;
1032 	struct tun_softc	*sc = kn->kn_hook;
1033 
1034 	s = splhigh();
1035 	klist_remove(&sc->sc_wsel.si_note, kn);
1036 	splx(s);
1037 }
1038 
1039 int
1040 filt_tunwrite(struct knote *kn, long hint)
1041 {
1042 	struct tun_softc	*sc = kn->kn_hook;
1043 	struct ifnet		*ifp = &sc->sc_if;
1044 
1045 	kn->kn_data = ifp->if_hdrlen + ifp->if_hardmtu;
1046 
1047 	return (1);
1048 }
1049 
1050 void
1051 tun_start(struct ifnet *ifp)
1052 {
1053 	struct tun_softc	*sc = ifp->if_softc;
1054 
1055 	splassert(IPL_NET);
1056 
1057 	if (ifq_len(&ifp->if_snd))
1058 		tun_wakeup(sc);
1059 }
1060 
1061 void
1062 tun_link_state(struct tun_softc *sc, int link_state)
1063 {
1064 	struct ifnet *ifp = &sc->sc_if;
1065 
1066 	if (ifp->if_link_state != link_state) {
1067 		ifp->if_link_state = link_state;
1068 		if_link_state_change(ifp);
1069 	}
1070 }
1071