xref: /openbsd-src/sys/net/if_tun.c (revision a0747c9f67a4ae71ccb71e62a28d1ea19e06a63c)
1 /*	$OpenBSD: if_tun.c,v 1.231 2021/03/09 20:05:14 anton Exp $	*/
2 /*	$NetBSD: if_tun.c,v 1.24 1996/05/07 02:40:48 thorpej Exp $	*/
3 
4 /*
5  * Copyright (c) 1988, Julian Onions <Julian.Onions@nexor.co.uk>
6  * Nottingham University 1987.
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29 
30 /*
31  * This driver takes packets off the IP i/f and hands them up to a
32  * user process to have its wicked way with. This driver has its
33  * roots in a similar driver written by Phil Cockcroft (formerly) at
34  * UCL. This driver is based much more on read/write/select mode of
35  * operation though.
36  */
37 
38 /* #define	TUN_DEBUG	9 */
39 
40 #include <sys/param.h>
41 #include <sys/kernel.h>
42 #include <sys/proc.h>
43 #include <sys/systm.h>
44 #include <sys/mbuf.h>
45 #include <sys/protosw.h>
46 #include <sys/sigio.h>
47 #include <sys/socket.h>
48 #include <sys/ioctl.h>
49 #include <sys/errno.h>
50 #include <sys/syslog.h>
51 #include <sys/selinfo.h>
52 #include <sys/fcntl.h>
53 #include <sys/time.h>
54 #include <sys/device.h>
55 #include <sys/vnode.h>
56 #include <sys/signalvar.h>
57 #include <sys/poll.h>
58 #include <sys/conf.h>
59 #include <sys/smr.h>
60 
61 #include <net/if.h>
62 #include <net/if_types.h>
63 #include <net/netisr.h>
64 #include <net/rtable.h>
65 
66 #include <netinet/in.h>
67 #include <netinet/if_ether.h>
68 
69 #include "bpfilter.h"
70 #if NBPFILTER > 0
71 #include <net/bpf.h>
72 #endif
73 
74 #ifdef MPLS
75 #include <netmpls/mpls.h>
76 #endif /* MPLS */
77 
78 #include <net/if_tun.h>
79 
80 struct tun_softc {
81 	struct arpcom		sc_ac;		/* ethernet common data */
82 #define sc_if			sc_ac.ac_if
83 	struct selinfo		sc_rsel;	/* read select */
84 	struct selinfo		sc_wsel;	/* write select (not used) */
85 	SMR_LIST_ENTRY(tun_softc)
86 				sc_entry;	/* all tunnel interfaces */
87 	int			sc_unit;
88 	struct sigio_ref	sc_sigio;	/* async I/O registration */
89 	unsigned int		sc_flags;	/* misc flags */
90 #define TUN_DEAD			(1 << 16)
91 
92 	dev_t			sc_dev;
93 	struct refcnt		sc_refs;
94 	unsigned int		sc_reading;
95 };
96 
97 #ifdef	TUN_DEBUG
98 int	tundebug = TUN_DEBUG;
99 #define TUNDEBUG(a)	(tundebug? printf a : 0)
100 #else
101 #define TUNDEBUG(a)	/* (tundebug? printf a : 0) */
102 #endif
103 
104 /* Only these IFF flags are changeable by TUNSIFINFO */
105 #define TUN_IFF_FLAGS (IFF_UP|IFF_POINTOPOINT|IFF_MULTICAST|IFF_BROADCAST)
106 
107 void	tunattach(int);
108 
109 int	tun_dev_open(dev_t, const struct if_clone *, int, struct proc *);
110 int	tun_dev_close(dev_t, struct proc *);
111 int	tun_dev_ioctl(dev_t, u_long, void *);
112 int	tun_dev_read(dev_t, struct uio *, int);
113 int	tun_dev_write(dev_t, struct uio *, int, int);
114 int	tun_dev_poll(dev_t, int, struct proc *);
115 int	tun_dev_kqfilter(dev_t, struct knote *);
116 
117 int	tun_ioctl(struct ifnet *, u_long, caddr_t);
118 void	tun_input(struct ifnet *, struct mbuf *);
119 int	tun_output(struct ifnet *, struct mbuf *, struct sockaddr *,
120 	    struct rtentry *);
121 int	tun_enqueue(struct ifnet *, struct mbuf *);
122 int	tun_clone_create(struct if_clone *, int);
123 int	tap_clone_create(struct if_clone *, int);
124 int	tun_create(struct if_clone *, int, int);
125 int	tun_clone_destroy(struct ifnet *);
126 void	tun_wakeup(struct tun_softc *);
127 int	tun_init(struct tun_softc *);
128 void	tun_start(struct ifnet *);
129 int	filt_tunread(struct knote *, long);
130 int	filt_tunwrite(struct knote *, long);
131 void	filt_tunrdetach(struct knote *);
132 void	filt_tunwdetach(struct knote *);
133 void	tun_link_state(struct tun_softc *, int);
134 
135 const struct filterops tunread_filtops = {
136 	.f_flags	= FILTEROP_ISFD,
137 	.f_attach	= NULL,
138 	.f_detach	= filt_tunrdetach,
139 	.f_event	= filt_tunread,
140 };
141 
142 const struct filterops tunwrite_filtops = {
143 	.f_flags	= FILTEROP_ISFD,
144 	.f_attach	= NULL,
145 	.f_detach	= filt_tunwdetach,
146 	.f_event	= filt_tunwrite,
147 };
148 
149 SMR_LIST_HEAD(tun_list, tun_softc);
150 
151 struct if_clone tun_cloner =
152     IF_CLONE_INITIALIZER("tun", tun_clone_create, tun_clone_destroy);
153 
154 struct if_clone tap_cloner =
155     IF_CLONE_INITIALIZER("tap", tap_clone_create, tun_clone_destroy);
156 
157 void
158 tunattach(int n)
159 {
160 	if_clone_attach(&tun_cloner);
161 	if_clone_attach(&tap_cloner);
162 }
163 
164 int
165 tun_clone_create(struct if_clone *ifc, int unit)
166 {
167 	return (tun_create(ifc, unit, 0));
168 }
169 
170 int
171 tap_clone_create(struct if_clone *ifc, int unit)
172 {
173 	return (tun_create(ifc, unit, TUN_LAYER2));
174 }
175 
176 struct tun_list tun_devs_list = SMR_LIST_HEAD_INITIALIZER(tun_list);
177 
178 struct tun_softc *
179 tun_name_lookup(const char *name)
180 {
181 	struct tun_softc *sc;
182 
183 	KERNEL_ASSERT_LOCKED();
184 
185 	SMR_LIST_FOREACH_LOCKED(sc, &tun_devs_list, sc_entry) {
186 		if (strcmp(sc->sc_if.if_xname, name) == 0)
187 			return (sc);
188 	}
189 
190 	return (NULL);
191 }
192 
193 int
194 tun_insert(struct tun_softc *sc)
195 {
196 	int error = 0;
197 
198 	/* check for a race */
199 	if (tun_name_lookup(sc->sc_if.if_xname) != NULL)
200 		error = EEXIST;
201 	else {
202 		/* tun_name_lookup checks for the right lock already */
203 		SMR_LIST_INSERT_HEAD_LOCKED(&tun_devs_list, sc, sc_entry);
204 	}
205 
206 	return (error);
207 }
208 
209 int
210 tun_create(struct if_clone *ifc, int unit, int flags)
211 {
212 	struct tun_softc	*sc;
213 	struct ifnet		*ifp;
214 
215 	if (unit > minor(~0U))
216 		return (ENXIO);
217 
218 	KERNEL_ASSERT_LOCKED();
219 
220 	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
221 	ifp = &sc->sc_if;
222 	snprintf(ifp->if_xname, sizeof(ifp->if_xname),
223 	    "%s%d", ifc->ifc_name, unit);
224 	ifp->if_softc = sc;
225 
226 	/* this is enough state for tun_dev_open to work with */
227 
228 	if (tun_insert(sc) != 0)
229 		goto exists;
230 
231 	/* build the interface */
232 
233 	ifp->if_ioctl = tun_ioctl;
234 	ifp->if_enqueue = tun_enqueue;
235 	ifp->if_start = tun_start;
236 	ifp->if_hardmtu = TUNMRU;
237 	ifp->if_link_state = LINK_STATE_DOWN;
238 
239 	if_counters_alloc(ifp);
240 
241 	if ((flags & TUN_LAYER2) == 0) {
242 #if NBPFILTER > 0
243 		ifp->if_bpf_mtap = bpf_mtap;
244 #endif
245 		ifp->if_input = tun_input;
246 		ifp->if_output = tun_output;
247 		ifp->if_mtu = ETHERMTU;
248 		ifp->if_flags = (IFF_POINTOPOINT|IFF_MULTICAST);
249 		ifp->if_type = IFT_TUNNEL;
250 		ifp->if_hdrlen = sizeof(u_int32_t);
251 		ifp->if_rtrequest = p2p_rtrequest;
252 
253 		if_attach(ifp);
254 		if_alloc_sadl(ifp);
255 
256 #if NBPFILTER > 0
257 		bpfattach(&ifp->if_bpf, ifp, DLT_LOOP, sizeof(u_int32_t));
258 #endif
259 	} else {
260 		sc->sc_flags |= TUN_LAYER2;
261 		ether_fakeaddr(ifp);
262 		ifp->if_flags =
263 		    (IFF_BROADCAST|IFF_SIMPLEX|IFF_MULTICAST);
264 
265 		if_attach(ifp);
266 		ether_ifattach(ifp);
267 	}
268 
269 	sigio_init(&sc->sc_sigio);
270 	refcnt_init(&sc->sc_refs);
271 
272 	/* tell tun_dev_open we're initialised */
273 
274 	sc->sc_flags |= TUN_INITED|TUN_STAYUP;
275 	wakeup(sc);
276 
277 	return (0);
278 
279 exists:
280 	free(sc, M_DEVBUF, sizeof(*sc));
281 	return (EEXIST);
282 }
283 
284 int
285 tun_clone_destroy(struct ifnet *ifp)
286 {
287 	struct tun_softc	*sc = ifp->if_softc;
288 	dev_t			 dev;
289 	int			 s;
290 
291 	KERNEL_ASSERT_LOCKED();
292 
293 	if (ISSET(sc->sc_flags, TUN_DEAD))
294 		return (ENXIO);
295 	SET(sc->sc_flags, TUN_DEAD);
296 
297 	/* kick userland off the device */
298 	dev = sc->sc_dev;
299 	if (dev) {
300 		struct vnode *vp;
301 
302 		if (vfinddev(dev, VCHR, &vp))
303 			VOP_REVOKE(vp, REVOKEALL);
304 
305 		KASSERT(sc->sc_dev == 0);
306 	}
307 
308 	/* prevent userland from getting to the device again */
309 	SMR_LIST_REMOVE_LOCKED(sc, sc_entry);
310 	smr_barrier();
311 
312 	/* help read() give up */
313 	if (sc->sc_reading)
314 		wakeup(&ifp->if_snd);
315 
316 	/* wait for device entrypoints to finish */
317 	refcnt_finalize(&sc->sc_refs, "tundtor");
318 
319 	s = splhigh();
320 	klist_invalidate(&sc->sc_rsel.si_note);
321 	klist_invalidate(&sc->sc_wsel.si_note);
322 	splx(s);
323 
324 	if (ISSET(sc->sc_flags, TUN_LAYER2))
325 		ether_ifdetach(ifp);
326 
327 	if_detach(ifp);
328 	sigio_free(&sc->sc_sigio);
329 
330 	free(sc, M_DEVBUF, sizeof *sc);
331 	return (0);
332 }
333 
334 static struct tun_softc *
335 tun_get(dev_t dev)
336 {
337 	struct tun_softc *sc;
338 
339 	smr_read_enter();
340 	SMR_LIST_FOREACH(sc, &tun_devs_list, sc_entry) {
341 		if (sc->sc_dev == dev) {
342 			refcnt_take(&sc->sc_refs);
343 			break;
344 		}
345 	}
346 	smr_read_leave();
347 
348 	return (sc);
349 }
350 
351 static inline void
352 tun_put(struct tun_softc *sc)
353 {
354 	refcnt_rele_wake(&sc->sc_refs);
355 }
356 
357 int
358 tunopen(dev_t dev, int flag, int mode, struct proc *p)
359 {
360 	return (tun_dev_open(dev, &tun_cloner, mode, p));
361 }
362 
363 int
364 tapopen(dev_t dev, int flag, int mode, struct proc *p)
365 {
366 	return (tun_dev_open(dev, &tap_cloner, mode, p));
367 }
368 
369 int
370 tun_dev_open(dev_t dev, const struct if_clone *ifc, int mode, struct proc *p)
371 {
372 	struct tun_softc *sc;
373 	struct ifnet *ifp;
374 	int error;
375 	u_short stayup = 0;
376 
377 	char name[IFNAMSIZ];
378 	unsigned int rdomain;
379 
380 	snprintf(name, sizeof(name), "%s%u", ifc->ifc_name, minor(dev));
381 	rdomain = rtable_l2(p->p_p->ps_rtableid);
382 
383 	/* let's find or make an interface to work with */
384 	while ((ifp = if_unit(name)) == NULL) {
385 		error = if_clone_create(name, rdomain);
386 		switch (error) {
387 		case 0: /* it's probably ours */
388 			stayup = TUN_STAYUP;
389 			/* FALLTHROUGH */
390 		case EEXIST: /* we may have lost a race with someone else */
391 			break;
392 		default:
393 			return (error);
394 		}
395 	}
396 
397 	sc = ifp->if_softc;
398 	/* wait for it to be fully constructed before we use it */
399 	while (!ISSET(sc->sc_flags, TUN_INITED)) {
400 		error = tsleep_nsec(sc, PCATCH, "tuninit", INFSLP);
401 		if (error != 0) {
402 			/* XXX if_clone_destroy if stayup? */
403 			if_put(ifp);
404 			return (error);
405 		}
406 	}
407 
408 	if (sc->sc_dev != 0) {
409 		/* aww, we lost */
410 		if_put(ifp);
411 		return (EBUSY);
412 	}
413 	/* it's ours now */
414 	sc->sc_dev = dev;
415 	CLR(sc->sc_flags, stayup);
416 
417 	/* automatically mark the interface running on open */
418 	SET(ifp->if_flags, IFF_UP | IFF_RUNNING);
419 	if_put(ifp);
420 	tun_link_state(sc, LINK_STATE_FULL_DUPLEX);
421 
422 	return (0);
423 }
424 
425 /*
426  * tunclose - close the device; if closing the real device, flush pending
427  *  output and unless STAYUP bring down and destroy the interface.
428  */
429 int
430 tunclose(dev_t dev, int flag, int mode, struct proc *p)
431 {
432 	return (tun_dev_close(dev, p));
433 }
434 
435 int
436 tapclose(dev_t dev, int flag, int mode, struct proc *p)
437 {
438 	return (tun_dev_close(dev, p));
439 }
440 
441 int
442 tun_dev_close(dev_t dev, struct proc *p)
443 {
444 	struct tun_softc	*sc;
445 	struct ifnet		*ifp;
446 	int			 error = 0;
447 	char			 name[IFNAMSIZ];
448 	int			 destroy = 0;
449 
450 	sc = tun_get(dev);
451 	if (sc == NULL)
452 		return (ENXIO);
453 
454 	ifp = &sc->sc_if;
455 
456 	/*
457 	 * junk all pending output
458 	 */
459 	CLR(ifp->if_flags, IFF_UP | IFF_RUNNING);
460 	ifq_purge(&ifp->if_snd);
461 
462 	CLR(sc->sc_flags, TUN_ASYNC);
463 	selwakeup(&sc->sc_rsel);
464 	sigio_free(&sc->sc_sigio);
465 
466 	if (!ISSET(sc->sc_flags, TUN_DEAD)) {
467 		/* we can't hold a reference to sc before we start a dtor */
468 		if (!ISSET(sc->sc_flags, TUN_STAYUP)) {
469 			destroy = 1;
470 			strlcpy(name, ifp->if_xname, sizeof(name));
471 		} else {
472 			CLR(ifp->if_flags, IFF_UP | IFF_RUNNING);
473 			tun_link_state(sc, LINK_STATE_DOWN);
474 		}
475 	}
476 
477 	sc->sc_dev = 0;
478 
479 	tun_put(sc);
480 
481 	if (destroy)
482 		if_clone_destroy(name);
483 
484 	return (error);
485 }
486 
487 int
488 tun_init(struct tun_softc *sc)
489 {
490 	struct ifnet	*ifp = &sc->sc_if;
491 	struct ifaddr	*ifa;
492 
493 	TUNDEBUG(("%s: tun_init\n", ifp->if_xname));
494 
495 	ifp->if_flags |= IFF_UP | IFF_RUNNING;
496 
497 	sc->sc_flags &= ~(TUN_IASET|TUN_DSTADDR|TUN_BRDADDR);
498 	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
499 		if (ifa->ifa_addr->sa_family == AF_INET) {
500 			struct sockaddr_in *sin;
501 
502 			sin = satosin(ifa->ifa_addr);
503 			if (sin && sin->sin_addr.s_addr)
504 				sc->sc_flags |= TUN_IASET;
505 
506 			if (ifp->if_flags & IFF_POINTOPOINT) {
507 				sin = satosin(ifa->ifa_dstaddr);
508 				if (sin && sin->sin_addr.s_addr)
509 					sc->sc_flags |= TUN_DSTADDR;
510 			} else
511 				sc->sc_flags &= ~TUN_DSTADDR;
512 
513 			if (ifp->if_flags & IFF_BROADCAST) {
514 				sin = satosin(ifa->ifa_broadaddr);
515 				if (sin && sin->sin_addr.s_addr)
516 					sc->sc_flags |= TUN_BRDADDR;
517 			} else
518 				sc->sc_flags &= ~TUN_BRDADDR;
519 		}
520 #ifdef INET6
521 		if (ifa->ifa_addr->sa_family == AF_INET6) {
522 			struct sockaddr_in6 *sin6;
523 
524 			sin6 = satosin6(ifa->ifa_addr);
525 			if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
526 				sc->sc_flags |= TUN_IASET;
527 
528 			if (ifp->if_flags & IFF_POINTOPOINT) {
529 				sin6 = satosin6(ifa->ifa_dstaddr);
530 				if (sin6 &&
531 				    !IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
532 					sc->sc_flags |= TUN_DSTADDR;
533 			} else
534 				sc->sc_flags &= ~TUN_DSTADDR;
535 		}
536 #endif /* INET6 */
537 	}
538 
539 	return (0);
540 }
541 
542 /*
543  * Process an ioctl request.
544  */
545 int
546 tun_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
547 {
548 	struct tun_softc	*sc = (struct tun_softc *)(ifp->if_softc);
549 	struct ifreq		*ifr = (struct ifreq *)data;
550 	int			 error = 0;
551 
552 	switch (cmd) {
553 	case SIOCSIFADDR:
554 		tun_init(sc);
555 		break;
556 	case SIOCSIFFLAGS:
557 		if (ISSET(ifp->if_flags, IFF_UP))
558 			SET(ifp->if_flags, IFF_RUNNING);
559 		else
560 			CLR(ifp->if_flags, IFF_RUNNING);
561 		break;
562 
563 	case SIOCSIFDSTADDR:
564 		tun_init(sc);
565 		TUNDEBUG(("%s: destination address set\n", ifp->if_xname));
566 		break;
567 	case SIOCSIFMTU:
568 		if (ifr->ifr_mtu < ETHERMIN || ifr->ifr_mtu > TUNMRU)
569 			error = EINVAL;
570 		else
571 			ifp->if_mtu = ifr->ifr_mtu;
572 		break;
573 	case SIOCADDMULTI:
574 	case SIOCDELMULTI:
575 		break;
576 	default:
577 		if (sc->sc_flags & TUN_LAYER2)
578 			error = ether_ioctl(ifp, &sc->sc_ac, cmd, data);
579 		else
580 			error = ENOTTY;
581 	}
582 
583 	return (error);
584 }
585 
586 /*
587  * tun_output - queue packets from higher level ready to put out.
588  */
589 int
590 tun_output(struct ifnet *ifp, struct mbuf *m0, struct sockaddr *dst,
591     struct rtentry *rt)
592 {
593 	u_int32_t		*af;
594 
595 	if (!ISSET(ifp->if_flags, IFF_RUNNING)) {
596 		m_freem(m0);
597 		return (EHOSTDOWN);
598 	}
599 
600 	M_PREPEND(m0, sizeof(*af), M_DONTWAIT);
601 	if (m0 == NULL)
602 		return (ENOBUFS);
603 	af = mtod(m0, u_int32_t *);
604 	*af = htonl(dst->sa_family);
605 
606 	return (if_enqueue(ifp, m0));
607 }
608 
609 int
610 tun_enqueue(struct ifnet *ifp, struct mbuf *m0)
611 {
612 	struct tun_softc	*sc = ifp->if_softc;
613 	int			 error;
614 
615 	error = ifq_enqueue(&ifp->if_snd, m0);
616 	if (error != 0)
617 		return (error);
618 
619 	tun_wakeup(sc);
620 
621 	return (0);
622 }
623 
624 void
625 tun_wakeup(struct tun_softc *sc)
626 {
627 	if (sc->sc_reading)
628 		wakeup(&sc->sc_if.if_snd);
629 
630 	selwakeup(&sc->sc_rsel);
631 	if (sc->sc_flags & TUN_ASYNC)
632 		pgsigio(&sc->sc_sigio, SIGIO, 0);
633 }
634 
635 /*
636  * the cdevsw interface is now pretty minimal.
637  */
638 int
639 tunioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p)
640 {
641 	return (tun_dev_ioctl(dev, cmd, data));
642 }
643 
644 int
645 tapioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p)
646 {
647 	return (tun_dev_ioctl(dev, cmd, data));
648 }
649 
650 int
651 tun_dev_ioctl(dev_t dev, u_long cmd, void *data)
652 {
653 	struct tun_softc	*sc;
654 	struct tuninfo		*tunp;
655 	int			 error = 0;
656 
657 	sc = tun_get(dev);
658 	if (sc == NULL)
659 		return (ENXIO);
660 
661 	switch (cmd) {
662 	case TUNSIFINFO:
663 		tunp = (struct tuninfo *)data;
664 		if (tunp->mtu < ETHERMIN || tunp->mtu > TUNMRU) {
665 			error = EINVAL;
666 			break;
667 		}
668 		if (tunp->type != sc->sc_if.if_type) {
669 			error = EINVAL;
670 			break;
671 		}
672 		sc->sc_if.if_mtu = tunp->mtu;
673 		sc->sc_if.if_flags =
674 		    (tunp->flags & TUN_IFF_FLAGS) |
675 		    (sc->sc_if.if_flags & ~TUN_IFF_FLAGS);
676 		sc->sc_if.if_baudrate = tunp->baudrate;
677 		break;
678 	case TUNGIFINFO:
679 		tunp = (struct tuninfo *)data;
680 		tunp->mtu = sc->sc_if.if_mtu;
681 		tunp->type = sc->sc_if.if_type;
682 		tunp->flags = sc->sc_if.if_flags;
683 		tunp->baudrate = sc->sc_if.if_baudrate;
684 		break;
685 #ifdef TUN_DEBUG
686 	case TUNSDEBUG:
687 		tundebug = *(int *)data;
688 		break;
689 	case TUNGDEBUG:
690 		*(int *)data = tundebug;
691 		break;
692 #endif
693 	case TUNSIFMODE:
694 		switch (*(int *)data & (IFF_POINTOPOINT|IFF_BROADCAST)) {
695 		case IFF_POINTOPOINT:
696 		case IFF_BROADCAST:
697 			sc->sc_if.if_flags &= ~TUN_IFF_FLAGS;
698 			sc->sc_if.if_flags |= *(int *)data & TUN_IFF_FLAGS;
699 			break;
700 		default:
701 			error = EINVAL;
702 			break;
703 		}
704 		break;
705 
706 	case FIONBIO:
707 		break;
708 	case FIOASYNC:
709 		if (*(int *)data)
710 			sc->sc_flags |= TUN_ASYNC;
711 		else
712 			sc->sc_flags &= ~TUN_ASYNC;
713 		break;
714 	case FIONREAD:
715 		*(int *)data = ifq_hdatalen(&sc->sc_if.if_snd);
716 		break;
717 	case FIOSETOWN:
718 	case TIOCSPGRP:
719 		error = sigio_setown(&sc->sc_sigio, cmd, data);
720 		break;
721 	case FIOGETOWN:
722 	case TIOCGPGRP:
723 		sigio_getown(&sc->sc_sigio, cmd, data);
724 		break;
725 	case SIOCGIFADDR:
726 		if (!(sc->sc_flags & TUN_LAYER2)) {
727 			error = EINVAL;
728 			break;
729 		}
730 		bcopy(sc->sc_ac.ac_enaddr, data,
731 		    sizeof(sc->sc_ac.ac_enaddr));
732 		break;
733 
734 	case SIOCSIFADDR:
735 		if (!(sc->sc_flags & TUN_LAYER2)) {
736 			error = EINVAL;
737 			break;
738 		}
739 		bcopy(data, sc->sc_ac.ac_enaddr,
740 		    sizeof(sc->sc_ac.ac_enaddr));
741 		break;
742 	default:
743 		error = ENOTTY;
744 		break;
745 	}
746 
747 	tun_put(sc);
748 	return (error);
749 }
750 
751 /*
752  * The cdevsw read interface - reads a packet at a time, or at
753  * least as much of a packet as can be read.
754  */
755 int
756 tunread(dev_t dev, struct uio *uio, int ioflag)
757 {
758 	return (tun_dev_read(dev, uio, ioflag));
759 }
760 
761 int
762 tapread(dev_t dev, struct uio *uio, int ioflag)
763 {
764 	return (tun_dev_read(dev, uio, ioflag));
765 }
766 
767 int
768 tun_dev_read(dev_t dev, struct uio *uio, int ioflag)
769 {
770 	struct tun_softc	*sc;
771 	struct ifnet		*ifp;
772 	struct mbuf		*m, *m0;
773 	int			 error = 0;
774 
775 	sc = tun_get(dev);
776 	if (sc == NULL)
777 		return (ENXIO);
778 
779 	ifp = &sc->sc_if;
780 
781 	error = ifq_deq_sleep(&ifp->if_snd, &m0, ISSET(ioflag, IO_NDELAY),
782 	    (PZERO + 1)|PCATCH, "tunread", &sc->sc_reading, &sc->sc_dev);
783 	if (error != 0)
784 		goto put;
785 
786 #if NBPFILTER > 0
787 	if (ifp->if_bpf)
788 		bpf_mtap(ifp->if_bpf, m0, BPF_DIRECTION_OUT);
789 #endif
790 
791 	m = m0;
792 	while (uio->uio_resid > 0) {
793 		size_t len = ulmin(uio->uio_resid, m->m_len);
794 		if (len > 0) {
795 			error = uiomove(mtod(m, void *), len, uio);
796 			if (error != 0)
797 				break;
798 		}
799 
800 		m = m->m_next;
801 		if (m == NULL)
802 			break;
803 	}
804 
805 	m_freem(m0);
806 
807 put:
808 	tun_put(sc);
809 	return (error);
810 }
811 
812 /*
813  * the cdevsw write interface - an atomic write is a packet - or else!
814  */
815 int
816 tunwrite(dev_t dev, struct uio *uio, int ioflag)
817 {
818 	return (tun_dev_write(dev, uio, ioflag, 0));
819 }
820 
821 int
822 tapwrite(dev_t dev, struct uio *uio, int ioflag)
823 {
824 	return (tun_dev_write(dev, uio, ioflag, ETHER_ALIGN));
825 }
826 
827 int
828 tun_dev_write(dev_t dev, struct uio *uio, int ioflag, int align)
829 {
830 	struct tun_softc	*sc;
831 	struct ifnet		*ifp;
832 	struct mbuf		*m0;
833 	int			error = 0;
834 	size_t			mlen;
835 
836 	sc = tun_get(dev);
837 	if (sc == NULL)
838 		return (ENXIO);
839 
840 	ifp = &sc->sc_if;
841 
842 	if (uio->uio_resid < ifp->if_hdrlen ||
843 	    uio->uio_resid > (ifp->if_hdrlen + ifp->if_hardmtu)) {
844 		error = EMSGSIZE;
845 		goto put;
846 	}
847 
848 	align += max_linkhdr;
849 	mlen = align + uio->uio_resid;
850 
851 	m0 = m_gethdr(M_DONTWAIT, MT_DATA);
852 	if (m0 == NULL) {
853 		error = ENOMEM;
854 		goto put;
855 	}
856 	if (mlen > MHLEN) {
857 		m_clget(m0, M_DONTWAIT, mlen);
858 		if (!ISSET(m0->m_flags, M_EXT)) {
859 			error = ENOMEM;
860 			goto drop;
861 		}
862 	}
863 
864 	m_align(m0, mlen);
865 	m0->m_pkthdr.len = m0->m_len = mlen;
866 	m_adj(m0, align);
867 
868 	error = uiomove(mtod(m0, void *), m0->m_len, uio);
869 	if (error != 0)
870 		goto drop;
871 
872 	NET_LOCK();
873 	if_vinput(ifp, m0);
874 	NET_UNLOCK();
875 
876 	tun_put(sc);
877 	return (0);
878 
879 drop:
880 	m_freem(m0);
881 put:
882 	tun_put(sc);
883 	return (error);
884 }
885 
886 void
887 tun_input(struct ifnet *ifp, struct mbuf *m0)
888 {
889 	uint32_t		af;
890 
891 	KASSERT(m0->m_len >= sizeof(af));
892 
893 	af = *mtod(m0, uint32_t *);
894 	/* strip the tunnel header */
895 	m_adj(m0, sizeof(af));
896 
897 	switch (ntohl(af)) {
898 	case AF_INET:
899 		ipv4_input(ifp, m0);
900 		break;
901 #ifdef INET6
902 	case AF_INET6:
903 		ipv6_input(ifp, m0);
904 		break;
905 #endif
906 #ifdef MPLS
907 	case AF_MPLS:
908 		mpls_input(ifp, m0);
909 		break;
910 #endif
911 	default:
912 		m_freem(m0);
913 		break;
914 	}
915 }
916 
917 /*
918  * tunpoll - the poll interface, this is only useful on reads
919  * really. The write detect always returns true, write never blocks
920  * anyway, it either accepts the packet or drops it.
921  */
922 int
923 tunpoll(dev_t dev, int events, struct proc *p)
924 {
925 	return (tun_dev_poll(dev, events, p));
926 }
927 
928 int
929 tappoll(dev_t dev, int events, struct proc *p)
930 {
931 	return (tun_dev_poll(dev, events, p));
932 }
933 
934 int
935 tun_dev_poll(dev_t dev, int events, struct proc *p)
936 {
937 	struct tun_softc	*sc;
938 	struct ifnet		*ifp;
939 	int			 revents;
940 
941 	sc = tun_get(dev);
942 	if (sc == NULL)
943 		return (POLLERR);
944 
945 	ifp = &sc->sc_if;
946 	revents = 0;
947 
948 	if (events & (POLLIN | POLLRDNORM)) {
949 		if (!ifq_empty(&ifp->if_snd))
950 			revents |= events & (POLLIN | POLLRDNORM);
951 		else
952 			selrecord(p, &sc->sc_rsel);
953 	}
954 	if (events & (POLLOUT | POLLWRNORM))
955 		revents |= events & (POLLOUT | POLLWRNORM);
956 
957 	tun_put(sc);
958 	return (revents);
959 }
960 
961 int
962 tunkqfilter(dev_t dev, struct knote *kn)
963 {
964 	return (tun_dev_kqfilter(dev, kn));
965 }
966 
967 int
968 tapkqfilter(dev_t dev, struct knote *kn)
969 {
970 	return (tun_dev_kqfilter(dev, kn));
971 }
972 
973 int
974 tun_dev_kqfilter(dev_t dev, struct knote *kn)
975 {
976 	struct tun_softc	*sc;
977 	struct ifnet		*ifp;
978 	struct klist		*klist;
979 	int			 error = 0;
980 	int			 s;
981 
982 	sc = tun_get(dev);
983 	if (sc == NULL)
984 		return (ENXIO);
985 
986 	ifp = &sc->sc_if;
987 
988 	switch (kn->kn_filter) {
989 	case EVFILT_READ:
990 		klist = &sc->sc_rsel.si_note;
991 		kn->kn_fop = &tunread_filtops;
992 		break;
993 	case EVFILT_WRITE:
994 		klist = &sc->sc_wsel.si_note;
995 		kn->kn_fop = &tunwrite_filtops;
996 		break;
997 	default:
998 		error = EINVAL;
999 		goto put;
1000 	}
1001 
1002 	kn->kn_hook = (caddr_t)sc; /* XXX give the sc_ref to the hook? */
1003 
1004 	s = splhigh();
1005 	klist_insert_locked(klist, kn);
1006 	splx(s);
1007 
1008 put:
1009 	tun_put(sc);
1010 	return (error);
1011 }
1012 
1013 void
1014 filt_tunrdetach(struct knote *kn)
1015 {
1016 	int			 s;
1017 	struct tun_softc	*sc = kn->kn_hook;
1018 
1019 	s = splhigh();
1020 	klist_remove_locked(&sc->sc_rsel.si_note, kn);
1021 	splx(s);
1022 }
1023 
1024 int
1025 filt_tunread(struct knote *kn, long hint)
1026 {
1027 	struct tun_softc	*sc = kn->kn_hook;
1028 	struct ifnet		*ifp = &sc->sc_if;
1029 
1030 	kn->kn_data = ifq_hdatalen(&ifp->if_snd);
1031 
1032 	return (kn->kn_data > 0);
1033 }
1034 
1035 void
1036 filt_tunwdetach(struct knote *kn)
1037 {
1038 	int			 s;
1039 	struct tun_softc	*sc = kn->kn_hook;
1040 
1041 	s = splhigh();
1042 	klist_remove_locked(&sc->sc_wsel.si_note, kn);
1043 	splx(s);
1044 }
1045 
1046 int
1047 filt_tunwrite(struct knote *kn, long hint)
1048 {
1049 	struct tun_softc	*sc = kn->kn_hook;
1050 	struct ifnet		*ifp = &sc->sc_if;
1051 
1052 	kn->kn_data = ifp->if_hdrlen + ifp->if_hardmtu;
1053 
1054 	return (1);
1055 }
1056 
1057 void
1058 tun_start(struct ifnet *ifp)
1059 {
1060 	struct tun_softc	*sc = ifp->if_softc;
1061 
1062 	splassert(IPL_NET);
1063 
1064 	if (ifq_len(&ifp->if_snd))
1065 		tun_wakeup(sc);
1066 }
1067 
1068 void
1069 tun_link_state(struct tun_softc *sc, int link_state)
1070 {
1071 	struct ifnet *ifp = &sc->sc_if;
1072 
1073 	if (ifp->if_link_state != link_state) {
1074 		ifp->if_link_state = link_state;
1075 		if_link_state_change(ifp);
1076 	}
1077 }
1078