xref: /openbsd-src/sys/net/if_tun.c (revision f6aab3d83b51b91c24247ad2c2573574de475a82)
1 /*	$OpenBSD: if_tun.c,v 1.238 2023/02/10 14:39:18 visa Exp $	*/
2 /*	$NetBSD: if_tun.c,v 1.24 1996/05/07 02:40:48 thorpej Exp $	*/
3 
4 /*
5  * Copyright (c) 1988, Julian Onions <Julian.Onions@nexor.co.uk>
6  * Nottingham University 1987.
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29 
30 /*
31  * This driver takes packets off the IP i/f and hands them up to a
32  * user process to have its wicked way with. This driver has its
33  * roots in a similar driver written by Phil Cockcroft (formerly) at
34  * UCL. This driver is based much more on read/write/select mode of
35  * operation though.
36  */
37 
38 /* #define	TUN_DEBUG	9 */
39 
40 #include <sys/param.h>
41 #include <sys/kernel.h>
42 #include <sys/proc.h>
43 #include <sys/systm.h>
44 #include <sys/mbuf.h>
45 #include <sys/sigio.h>
46 #include <sys/socket.h>
47 #include <sys/ioctl.h>
48 #include <sys/errno.h>
49 #include <sys/syslog.h>
50 #include <sys/fcntl.h>
51 #include <sys/time.h>
52 #include <sys/device.h>
53 #include <sys/vnode.h>
54 #include <sys/signalvar.h>
55 #include <sys/conf.h>
56 #include <sys/event.h>
57 #include <sys/mutex.h>
58 #include <sys/smr.h>
59 
60 #include <net/if.h>
61 #include <net/if_types.h>
62 #include <net/netisr.h>
63 #include <net/rtable.h>
64 
65 #include <netinet/in.h>
66 #include <netinet/if_ether.h>
67 
68 #include "bpfilter.h"
69 #if NBPFILTER > 0
70 #include <net/bpf.h>
71 #endif
72 
73 #ifdef MPLS
74 #include <netmpls/mpls.h>
75 #endif /* MPLS */
76 
77 #include <net/if_tun.h>
78 
79 struct tun_softc {
80 	struct arpcom		sc_ac;		/* ethernet common data */
81 #define sc_if			sc_ac.ac_if
82 	struct mutex		sc_mtx;
83 	struct klist		sc_rklist;	/* knotes for read */
84 	struct klist		sc_wklist;	/* knotes for write (unused) */
85 	SMR_LIST_ENTRY(tun_softc)
86 				sc_entry;	/* all tunnel interfaces */
87 	int			sc_unit;
88 	struct sigio_ref	sc_sigio;	/* async I/O registration */
89 	unsigned int		sc_flags;	/* misc flags */
90 #define TUN_DEAD			(1 << 16)
91 
92 	dev_t			sc_dev;
93 	struct refcnt		sc_refs;
94 	unsigned int		sc_reading;
95 };
96 
97 #ifdef	TUN_DEBUG
98 int	tundebug = TUN_DEBUG;
99 #define TUNDEBUG(a)	(tundebug? printf a : 0)
100 #else
101 #define TUNDEBUG(a)	/* (tundebug? printf a : 0) */
102 #endif
103 
104 /* Only these IFF flags are changeable by TUNSIFINFO */
105 #define TUN_IFF_FLAGS (IFF_UP|IFF_POINTOPOINT|IFF_MULTICAST|IFF_BROADCAST)
106 
107 void	tunattach(int);
108 
109 int	tun_dev_open(dev_t, const struct if_clone *, int, struct proc *);
110 int	tun_dev_close(dev_t, struct proc *);
111 int	tun_dev_ioctl(dev_t, u_long, void *);
112 int	tun_dev_read(dev_t, struct uio *, int);
113 int	tun_dev_write(dev_t, struct uio *, int, int);
114 int	tun_dev_kqfilter(dev_t, struct knote *);
115 
116 int	tun_ioctl(struct ifnet *, u_long, caddr_t);
117 void	tun_input(struct ifnet *, struct mbuf *);
118 int	tun_output(struct ifnet *, struct mbuf *, struct sockaddr *,
119 	    struct rtentry *);
120 int	tun_enqueue(struct ifnet *, struct mbuf *);
121 int	tun_clone_create(struct if_clone *, int);
122 int	tap_clone_create(struct if_clone *, int);
123 int	tun_create(struct if_clone *, int, int);
124 int	tun_clone_destroy(struct ifnet *);
125 void	tun_wakeup(struct tun_softc *);
126 int	tun_init(struct tun_softc *);
127 void	tun_start(struct ifnet *);
128 int	filt_tunread(struct knote *, long);
129 int	filt_tunwrite(struct knote *, long);
130 int	filt_tunmodify(struct kevent *, struct knote *);
131 int	filt_tunprocess(struct knote *, struct kevent *);
132 void	filt_tunrdetach(struct knote *);
133 void	filt_tunwdetach(struct knote *);
134 void	tun_link_state(struct ifnet *, int);
135 
136 const struct filterops tunread_filtops = {
137 	.f_flags	= FILTEROP_ISFD | FILTEROP_MPSAFE,
138 	.f_attach	= NULL,
139 	.f_detach	= filt_tunrdetach,
140 	.f_event	= filt_tunread,
141 	.f_modify	= filt_tunmodify,
142 	.f_process	= filt_tunprocess,
143 };
144 
145 const struct filterops tunwrite_filtops = {
146 	.f_flags	= FILTEROP_ISFD | FILTEROP_MPSAFE,
147 	.f_attach	= NULL,
148 	.f_detach	= filt_tunwdetach,
149 	.f_event	= filt_tunwrite,
150 	.f_modify	= filt_tunmodify,
151 	.f_process	= filt_tunprocess,
152 };
153 
154 SMR_LIST_HEAD(tun_list, tun_softc);
155 
156 struct if_clone tun_cloner =
157     IF_CLONE_INITIALIZER("tun", tun_clone_create, tun_clone_destroy);
158 
159 struct if_clone tap_cloner =
160     IF_CLONE_INITIALIZER("tap", tap_clone_create, tun_clone_destroy);
161 
162 void
163 tunattach(int n)
164 {
165 	if_clone_attach(&tun_cloner);
166 	if_clone_attach(&tap_cloner);
167 }
168 
169 int
170 tun_clone_create(struct if_clone *ifc, int unit)
171 {
172 	return (tun_create(ifc, unit, 0));
173 }
174 
175 int
176 tap_clone_create(struct if_clone *ifc, int unit)
177 {
178 	return (tun_create(ifc, unit, TUN_LAYER2));
179 }
180 
181 struct tun_list tun_devs_list = SMR_LIST_HEAD_INITIALIZER(tun_list);
182 
183 struct tun_softc *
184 tun_name_lookup(const char *name)
185 {
186 	struct tun_softc *sc;
187 
188 	KERNEL_ASSERT_LOCKED();
189 
190 	SMR_LIST_FOREACH_LOCKED(sc, &tun_devs_list, sc_entry) {
191 		if (strcmp(sc->sc_if.if_xname, name) == 0)
192 			return (sc);
193 	}
194 
195 	return (NULL);
196 }
197 
198 int
199 tun_insert(struct tun_softc *sc)
200 {
201 	int error = 0;
202 
203 	/* check for a race */
204 	if (tun_name_lookup(sc->sc_if.if_xname) != NULL)
205 		error = EEXIST;
206 	else {
207 		/* tun_name_lookup checks for the right lock already */
208 		SMR_LIST_INSERT_HEAD_LOCKED(&tun_devs_list, sc, sc_entry);
209 	}
210 
211 	return (error);
212 }
213 
214 int
215 tun_create(struct if_clone *ifc, int unit, int flags)
216 {
217 	struct tun_softc	*sc;
218 	struct ifnet		*ifp;
219 
220 	if (unit > minor(~0U))
221 		return (ENXIO);
222 
223 	KERNEL_ASSERT_LOCKED();
224 
225 	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
226 	refcnt_init(&sc->sc_refs);
227 
228 	ifp = &sc->sc_if;
229 	snprintf(ifp->if_xname, sizeof(ifp->if_xname),
230 	    "%s%d", ifc->ifc_name, unit);
231 	mtx_init(&sc->sc_mtx, IPL_NET);
232 	klist_init_mutex(&sc->sc_rklist, &sc->sc_mtx);
233 	klist_init_mutex(&sc->sc_wklist, &sc->sc_mtx);
234 	ifp->if_softc = sc;
235 
236 	/* this is enough state for tun_dev_open to work with */
237 
238 	if (tun_insert(sc) != 0)
239 		goto exists;
240 
241 	/* build the interface */
242 
243 	ifp->if_ioctl = tun_ioctl;
244 	ifp->if_enqueue = tun_enqueue;
245 	ifp->if_start = tun_start;
246 	ifp->if_hardmtu = TUNMRU;
247 	ifp->if_link_state = LINK_STATE_DOWN;
248 
249 	if_counters_alloc(ifp);
250 
251 	if ((flags & TUN_LAYER2) == 0) {
252 #if NBPFILTER > 0
253 		ifp->if_bpf_mtap = bpf_mtap;
254 #endif
255 		ifp->if_input = tun_input;
256 		ifp->if_output = tun_output;
257 		ifp->if_mtu = ETHERMTU;
258 		ifp->if_flags = (IFF_POINTOPOINT|IFF_MULTICAST);
259 		ifp->if_type = IFT_TUNNEL;
260 		ifp->if_hdrlen = sizeof(u_int32_t);
261 		ifp->if_rtrequest = p2p_rtrequest;
262 
263 		if_attach(ifp);
264 		if_alloc_sadl(ifp);
265 
266 #if NBPFILTER > 0
267 		bpfattach(&ifp->if_bpf, ifp, DLT_LOOP, sizeof(u_int32_t));
268 #endif
269 	} else {
270 		sc->sc_flags |= TUN_LAYER2;
271 		ether_fakeaddr(ifp);
272 		ifp->if_flags =
273 		    (IFF_BROADCAST|IFF_SIMPLEX|IFF_MULTICAST);
274 
275 		if_attach(ifp);
276 		ether_ifattach(ifp);
277 	}
278 
279 	sigio_init(&sc->sc_sigio);
280 
281 	/* tell tun_dev_open we're initialised */
282 
283 	sc->sc_flags |= TUN_INITED|TUN_STAYUP;
284 	wakeup(sc);
285 
286 	return (0);
287 
288 exists:
289 	klist_free(&sc->sc_rklist);
290 	klist_free(&sc->sc_wklist);
291 	free(sc, M_DEVBUF, sizeof(*sc));
292 	return (EEXIST);
293 }
294 
295 int
296 tun_clone_destroy(struct ifnet *ifp)
297 {
298 	struct tun_softc	*sc = ifp->if_softc;
299 	dev_t			 dev;
300 
301 	KERNEL_ASSERT_LOCKED();
302 
303 	if (ISSET(sc->sc_flags, TUN_DEAD))
304 		return (ENXIO);
305 	SET(sc->sc_flags, TUN_DEAD);
306 
307 	/* kick userland off the device */
308 	dev = sc->sc_dev;
309 	if (dev) {
310 		struct vnode *vp;
311 
312 		if (vfinddev(dev, VCHR, &vp))
313 			VOP_REVOKE(vp, REVOKEALL);
314 
315 		KASSERT(sc->sc_dev == 0);
316 	}
317 
318 	/* prevent userland from getting to the device again */
319 	SMR_LIST_REMOVE_LOCKED(sc, sc_entry);
320 	smr_barrier();
321 
322 	/* help read() give up */
323 	if (sc->sc_reading)
324 		wakeup(&ifp->if_snd);
325 
326 	/* wait for device entrypoints to finish */
327 	refcnt_finalize(&sc->sc_refs, "tundtor");
328 
329 	klist_invalidate(&sc->sc_rklist);
330 	klist_invalidate(&sc->sc_wklist);
331 
332 	klist_free(&sc->sc_rklist);
333 	klist_free(&sc->sc_wklist);
334 
335 	if (ISSET(sc->sc_flags, TUN_LAYER2))
336 		ether_ifdetach(ifp);
337 
338 	if_detach(ifp);
339 	sigio_free(&sc->sc_sigio);
340 
341 	free(sc, M_DEVBUF, sizeof *sc);
342 	return (0);
343 }
344 
345 static struct tun_softc *
346 tun_get(dev_t dev)
347 {
348 	struct tun_softc *sc;
349 
350 	smr_read_enter();
351 	SMR_LIST_FOREACH(sc, &tun_devs_list, sc_entry) {
352 		if (sc->sc_dev == dev) {
353 			refcnt_take(&sc->sc_refs);
354 			break;
355 		}
356 	}
357 	smr_read_leave();
358 
359 	return (sc);
360 }
361 
362 static inline void
363 tun_put(struct tun_softc *sc)
364 {
365 	refcnt_rele_wake(&sc->sc_refs);
366 }
367 
368 int
369 tunopen(dev_t dev, int flag, int mode, struct proc *p)
370 {
371 	return (tun_dev_open(dev, &tun_cloner, mode, p));
372 }
373 
374 int
375 tapopen(dev_t dev, int flag, int mode, struct proc *p)
376 {
377 	return (tun_dev_open(dev, &tap_cloner, mode, p));
378 }
379 
380 int
381 tun_dev_open(dev_t dev, const struct if_clone *ifc, int mode, struct proc *p)
382 {
383 	struct tun_softc *sc;
384 	struct ifnet *ifp;
385 	int error;
386 	u_short stayup = 0;
387 	struct vnode *vp;
388 
389 	char name[IFNAMSIZ];
390 	unsigned int rdomain;
391 
392 	/*
393 	 * Find the vnode associated with this open before we sleep
394 	 * and let something else revoke it. Our caller has a reference
395 	 * to it so we don't need to account for it.
396 	 */
397 	if (!vfinddev(dev, VCHR, &vp))
398 		panic("%s vfinddev failed", __func__);
399 
400 	snprintf(name, sizeof(name), "%s%u", ifc->ifc_name, minor(dev));
401 	rdomain = rtable_l2(p->p_p->ps_rtableid);
402 
403 	/* let's find or make an interface to work with */
404 	while ((sc = tun_name_lookup(name)) == NULL) {
405 		error = if_clone_create(name, rdomain);
406 		switch (error) {
407 		case 0: /* it's probably ours */
408 			stayup = TUN_STAYUP;
409 			/* FALLTHROUGH */
410 		case EEXIST: /* we may have lost a race with someone else */
411 			break;
412 		default:
413 			return (error);
414 		}
415 	}
416 
417 	refcnt_take(&sc->sc_refs);
418 
419 	/* wait for it to be fully constructed before we use it */
420 	for (;;) {
421 		if (ISSET(sc->sc_flags, TUN_DEAD)) {
422 			error = ENXIO;
423 			goto done;
424 		}
425 
426 		if (ISSET(sc->sc_flags, TUN_INITED))
427 			break;
428 
429 		error = tsleep_nsec(sc, PCATCH, "tuninit", INFSLP);
430 		if (error != 0) {
431 			/* XXX if_clone_destroy if stayup? */
432 			goto done;
433 		}
434 	}
435 
436 	/* Has tun_clone_destroy torn the rug out under us? */
437 	if (vp->v_type == VBAD) {
438 		error = ENXIO;
439 		goto done;
440 	}
441 
442 	if (sc->sc_dev != 0) {
443 		/* aww, we lost */
444 		error = EBUSY;
445 		goto done;
446 	}
447 	/* it's ours now */
448 	sc->sc_dev = dev;
449 	CLR(sc->sc_flags, stayup);
450 
451 	/* automatically mark the interface running on open */
452 	ifp = &sc->sc_if;
453 	NET_LOCK();
454 	SET(ifp->if_flags, IFF_UP | IFF_RUNNING);
455 	NET_UNLOCK();
456 	tun_link_state(ifp, LINK_STATE_FULL_DUPLEX);
457 	error = 0;
458 
459 done:
460 	tun_put(sc);
461 	return (error);
462 }
463 
464 /*
465  * tunclose - close the device; if closing the real device, flush pending
466  *  output and unless STAYUP bring down and destroy the interface.
467  */
468 int
469 tunclose(dev_t dev, int flag, int mode, struct proc *p)
470 {
471 	return (tun_dev_close(dev, p));
472 }
473 
474 int
475 tapclose(dev_t dev, int flag, int mode, struct proc *p)
476 {
477 	return (tun_dev_close(dev, p));
478 }
479 
480 int
481 tun_dev_close(dev_t dev, struct proc *p)
482 {
483 	struct tun_softc	*sc;
484 	struct ifnet		*ifp;
485 	int			 error = 0;
486 	char			 name[IFNAMSIZ];
487 	int			 destroy = 0;
488 
489 	sc = tun_get(dev);
490 	if (sc == NULL)
491 		return (ENXIO);
492 
493 	ifp = &sc->sc_if;
494 
495 	/*
496 	 * junk all pending output
497 	 */
498 	NET_LOCK();
499 	CLR(ifp->if_flags, IFF_UP | IFF_RUNNING);
500 	NET_UNLOCK();
501 	ifq_purge(&ifp->if_snd);
502 
503 	CLR(sc->sc_flags, TUN_ASYNC);
504 	sigio_free(&sc->sc_sigio);
505 
506 	if (!ISSET(sc->sc_flags, TUN_DEAD)) {
507 		/* we can't hold a reference to sc before we start a dtor */
508 		if (!ISSET(sc->sc_flags, TUN_STAYUP)) {
509 			destroy = 1;
510 			strlcpy(name, ifp->if_xname, sizeof(name));
511 		} else {
512 			tun_link_state(ifp, LINK_STATE_DOWN);
513 		}
514 	}
515 
516 	sc->sc_dev = 0;
517 
518 	tun_put(sc);
519 
520 	if (destroy)
521 		if_clone_destroy(name);
522 
523 	return (error);
524 }
525 
526 int
527 tun_init(struct tun_softc *sc)
528 {
529 	struct ifnet	*ifp = &sc->sc_if;
530 	struct ifaddr	*ifa;
531 
532 	TUNDEBUG(("%s: tun_init\n", ifp->if_xname));
533 
534 	ifp->if_flags |= IFF_UP | IFF_RUNNING;
535 
536 	sc->sc_flags &= ~(TUN_IASET|TUN_DSTADDR|TUN_BRDADDR);
537 	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
538 		if (ifa->ifa_addr->sa_family == AF_INET) {
539 			struct sockaddr_in *sin;
540 
541 			sin = satosin(ifa->ifa_addr);
542 			if (sin && sin->sin_addr.s_addr)
543 				sc->sc_flags |= TUN_IASET;
544 
545 			if (ifp->if_flags & IFF_POINTOPOINT) {
546 				sin = satosin(ifa->ifa_dstaddr);
547 				if (sin && sin->sin_addr.s_addr)
548 					sc->sc_flags |= TUN_DSTADDR;
549 			} else
550 				sc->sc_flags &= ~TUN_DSTADDR;
551 
552 			if (ifp->if_flags & IFF_BROADCAST) {
553 				sin = satosin(ifa->ifa_broadaddr);
554 				if (sin && sin->sin_addr.s_addr)
555 					sc->sc_flags |= TUN_BRDADDR;
556 			} else
557 				sc->sc_flags &= ~TUN_BRDADDR;
558 		}
559 #ifdef INET6
560 		if (ifa->ifa_addr->sa_family == AF_INET6) {
561 			struct sockaddr_in6 *sin6;
562 
563 			sin6 = satosin6(ifa->ifa_addr);
564 			if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
565 				sc->sc_flags |= TUN_IASET;
566 
567 			if (ifp->if_flags & IFF_POINTOPOINT) {
568 				sin6 = satosin6(ifa->ifa_dstaddr);
569 				if (sin6 &&
570 				    !IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
571 					sc->sc_flags |= TUN_DSTADDR;
572 			} else
573 				sc->sc_flags &= ~TUN_DSTADDR;
574 		}
575 #endif /* INET6 */
576 	}
577 
578 	return (0);
579 }
580 
581 /*
582  * Process an ioctl request.
583  */
584 int
585 tun_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
586 {
587 	struct tun_softc	*sc = (struct tun_softc *)(ifp->if_softc);
588 	struct ifreq		*ifr = (struct ifreq *)data;
589 	int			 error = 0;
590 
591 	switch (cmd) {
592 	case SIOCSIFADDR:
593 		tun_init(sc);
594 		break;
595 	case SIOCSIFFLAGS:
596 		if (ISSET(ifp->if_flags, IFF_UP))
597 			SET(ifp->if_flags, IFF_RUNNING);
598 		else
599 			CLR(ifp->if_flags, IFF_RUNNING);
600 		break;
601 
602 	case SIOCSIFDSTADDR:
603 		tun_init(sc);
604 		TUNDEBUG(("%s: destination address set\n", ifp->if_xname));
605 		break;
606 	case SIOCSIFMTU:
607 		if (ifr->ifr_mtu < ETHERMIN || ifr->ifr_mtu > TUNMRU)
608 			error = EINVAL;
609 		else
610 			ifp->if_mtu = ifr->ifr_mtu;
611 		break;
612 	case SIOCADDMULTI:
613 	case SIOCDELMULTI:
614 		break;
615 	default:
616 		if (sc->sc_flags & TUN_LAYER2)
617 			error = ether_ioctl(ifp, &sc->sc_ac, cmd, data);
618 		else
619 			error = ENOTTY;
620 	}
621 
622 	return (error);
623 }
624 
625 /*
626  * tun_output - queue packets from higher level ready to put out.
627  */
628 int
629 tun_output(struct ifnet *ifp, struct mbuf *m0, struct sockaddr *dst,
630     struct rtentry *rt)
631 {
632 	u_int32_t		*af;
633 
634 	if (!ISSET(ifp->if_flags, IFF_RUNNING)) {
635 		m_freem(m0);
636 		return (EHOSTDOWN);
637 	}
638 
639 	M_PREPEND(m0, sizeof(*af), M_DONTWAIT);
640 	if (m0 == NULL)
641 		return (ENOBUFS);
642 	af = mtod(m0, u_int32_t *);
643 	*af = htonl(dst->sa_family);
644 
645 	return (if_enqueue(ifp, m0));
646 }
647 
648 int
649 tun_enqueue(struct ifnet *ifp, struct mbuf *m0)
650 {
651 	struct tun_softc	*sc = ifp->if_softc;
652 	int			 error;
653 
654 	error = ifq_enqueue(&ifp->if_snd, m0);
655 	if (error != 0)
656 		return (error);
657 
658 	tun_wakeup(sc);
659 
660 	return (0);
661 }
662 
663 void
664 tun_wakeup(struct tun_softc *sc)
665 {
666 	if (sc->sc_reading)
667 		wakeup(&sc->sc_if.if_snd);
668 
669 	knote(&sc->sc_rklist, 0);
670 
671 	if (sc->sc_flags & TUN_ASYNC)
672 		pgsigio(&sc->sc_sigio, SIGIO, 0);
673 }
674 
675 /*
676  * the cdevsw interface is now pretty minimal.
677  */
678 int
679 tunioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p)
680 {
681 	return (tun_dev_ioctl(dev, cmd, data));
682 }
683 
684 int
685 tapioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p)
686 {
687 	return (tun_dev_ioctl(dev, cmd, data));
688 }
689 
690 int
691 tun_dev_ioctl(dev_t dev, u_long cmd, void *data)
692 {
693 	struct tun_softc	*sc;
694 	struct tuninfo		*tunp;
695 	int			 error = 0;
696 
697 	sc = tun_get(dev);
698 	if (sc == NULL)
699 		return (ENXIO);
700 
701 	switch (cmd) {
702 	case TUNSIFINFO:
703 		tunp = (struct tuninfo *)data;
704 		if (tunp->mtu < ETHERMIN || tunp->mtu > TUNMRU) {
705 			error = EINVAL;
706 			break;
707 		}
708 		if (tunp->type != sc->sc_if.if_type) {
709 			error = EINVAL;
710 			break;
711 		}
712 		sc->sc_if.if_mtu = tunp->mtu;
713 		sc->sc_if.if_flags =
714 		    (tunp->flags & TUN_IFF_FLAGS) |
715 		    (sc->sc_if.if_flags & ~TUN_IFF_FLAGS);
716 		sc->sc_if.if_baudrate = tunp->baudrate;
717 		break;
718 	case TUNGIFINFO:
719 		tunp = (struct tuninfo *)data;
720 		tunp->mtu = sc->sc_if.if_mtu;
721 		tunp->type = sc->sc_if.if_type;
722 		tunp->flags = sc->sc_if.if_flags;
723 		tunp->baudrate = sc->sc_if.if_baudrate;
724 		break;
725 #ifdef TUN_DEBUG
726 	case TUNSDEBUG:
727 		tundebug = *(int *)data;
728 		break;
729 	case TUNGDEBUG:
730 		*(int *)data = tundebug;
731 		break;
732 #endif
733 	case TUNSIFMODE:
734 		switch (*(int *)data & (IFF_POINTOPOINT|IFF_BROADCAST)) {
735 		case IFF_POINTOPOINT:
736 		case IFF_BROADCAST:
737 			sc->sc_if.if_flags &= ~TUN_IFF_FLAGS;
738 			sc->sc_if.if_flags |= *(int *)data & TUN_IFF_FLAGS;
739 			break;
740 		default:
741 			error = EINVAL;
742 			break;
743 		}
744 		break;
745 
746 	case FIONBIO:
747 		break;
748 	case FIOASYNC:
749 		if (*(int *)data)
750 			sc->sc_flags |= TUN_ASYNC;
751 		else
752 			sc->sc_flags &= ~TUN_ASYNC;
753 		break;
754 	case FIONREAD:
755 		*(int *)data = ifq_hdatalen(&sc->sc_if.if_snd);
756 		break;
757 	case FIOSETOWN:
758 	case TIOCSPGRP:
759 		error = sigio_setown(&sc->sc_sigio, cmd, data);
760 		break;
761 	case FIOGETOWN:
762 	case TIOCGPGRP:
763 		sigio_getown(&sc->sc_sigio, cmd, data);
764 		break;
765 	case SIOCGIFADDR:
766 		if (!(sc->sc_flags & TUN_LAYER2)) {
767 			error = EINVAL;
768 			break;
769 		}
770 		bcopy(sc->sc_ac.ac_enaddr, data,
771 		    sizeof(sc->sc_ac.ac_enaddr));
772 		break;
773 
774 	case SIOCSIFADDR:
775 		if (!(sc->sc_flags & TUN_LAYER2)) {
776 			error = EINVAL;
777 			break;
778 		}
779 		bcopy(data, sc->sc_ac.ac_enaddr,
780 		    sizeof(sc->sc_ac.ac_enaddr));
781 		break;
782 	default:
783 		error = ENOTTY;
784 		break;
785 	}
786 
787 	tun_put(sc);
788 	return (error);
789 }
790 
791 /*
792  * The cdevsw read interface - reads a packet at a time, or at
793  * least as much of a packet as can be read.
794  */
795 int
796 tunread(dev_t dev, struct uio *uio, int ioflag)
797 {
798 	return (tun_dev_read(dev, uio, ioflag));
799 }
800 
801 int
802 tapread(dev_t dev, struct uio *uio, int ioflag)
803 {
804 	return (tun_dev_read(dev, uio, ioflag));
805 }
806 
807 int
808 tun_dev_read(dev_t dev, struct uio *uio, int ioflag)
809 {
810 	struct tun_softc	*sc;
811 	struct ifnet		*ifp;
812 	struct mbuf		*m, *m0;
813 	int			 error = 0;
814 
815 	sc = tun_get(dev);
816 	if (sc == NULL)
817 		return (ENXIO);
818 
819 	ifp = &sc->sc_if;
820 
821 	error = ifq_deq_sleep(&ifp->if_snd, &m0, ISSET(ioflag, IO_NDELAY),
822 	    (PZERO + 1)|PCATCH, "tunread", &sc->sc_reading, &sc->sc_dev);
823 	if (error != 0)
824 		goto put;
825 
826 #if NBPFILTER > 0
827 	if (ifp->if_bpf)
828 		bpf_mtap(ifp->if_bpf, m0, BPF_DIRECTION_OUT);
829 #endif
830 
831 	m = m0;
832 	while (uio->uio_resid > 0) {
833 		size_t len = ulmin(uio->uio_resid, m->m_len);
834 		if (len > 0) {
835 			error = uiomove(mtod(m, void *), len, uio);
836 			if (error != 0)
837 				break;
838 		}
839 
840 		m = m->m_next;
841 		if (m == NULL)
842 			break;
843 	}
844 
845 	m_freem(m0);
846 
847 put:
848 	tun_put(sc);
849 	return (error);
850 }
851 
852 /*
853  * the cdevsw write interface - an atomic write is a packet - or else!
854  */
855 int
856 tunwrite(dev_t dev, struct uio *uio, int ioflag)
857 {
858 	return (tun_dev_write(dev, uio, ioflag, 0));
859 }
860 
861 int
862 tapwrite(dev_t dev, struct uio *uio, int ioflag)
863 {
864 	return (tun_dev_write(dev, uio, ioflag, ETHER_ALIGN));
865 }
866 
867 int
868 tun_dev_write(dev_t dev, struct uio *uio, int ioflag, int align)
869 {
870 	struct tun_softc	*sc;
871 	struct ifnet		*ifp;
872 	struct mbuf		*m0;
873 	int			error = 0;
874 	size_t			mlen;
875 
876 	sc = tun_get(dev);
877 	if (sc == NULL)
878 		return (ENXIO);
879 
880 	ifp = &sc->sc_if;
881 
882 	if (uio->uio_resid < ifp->if_hdrlen ||
883 	    uio->uio_resid > (ifp->if_hdrlen + ifp->if_hardmtu)) {
884 		error = EMSGSIZE;
885 		goto put;
886 	}
887 
888 	align += max_linkhdr;
889 	mlen = align + uio->uio_resid;
890 
891 	m0 = m_gethdr(M_DONTWAIT, MT_DATA);
892 	if (m0 == NULL) {
893 		error = ENOMEM;
894 		goto put;
895 	}
896 	if (mlen > MHLEN) {
897 		m_clget(m0, M_DONTWAIT, mlen);
898 		if (!ISSET(m0->m_flags, M_EXT)) {
899 			error = ENOMEM;
900 			goto drop;
901 		}
902 	}
903 
904 	m_align(m0, mlen);
905 	m0->m_pkthdr.len = m0->m_len = mlen;
906 	m_adj(m0, align);
907 
908 	error = uiomove(mtod(m0, void *), m0->m_len, uio);
909 	if (error != 0)
910 		goto drop;
911 
912 	NET_LOCK();
913 	if_vinput(ifp, m0);
914 	NET_UNLOCK();
915 
916 	tun_put(sc);
917 	return (0);
918 
919 drop:
920 	m_freem(m0);
921 put:
922 	tun_put(sc);
923 	return (error);
924 }
925 
926 void
927 tun_input(struct ifnet *ifp, struct mbuf *m0)
928 {
929 	uint32_t		af;
930 
931 	KASSERT(m0->m_len >= sizeof(af));
932 
933 	af = *mtod(m0, uint32_t *);
934 	/* strip the tunnel header */
935 	m_adj(m0, sizeof(af));
936 
937 	switch (ntohl(af)) {
938 	case AF_INET:
939 		ipv4_input(ifp, m0);
940 		break;
941 #ifdef INET6
942 	case AF_INET6:
943 		ipv6_input(ifp, m0);
944 		break;
945 #endif
946 #ifdef MPLS
947 	case AF_MPLS:
948 		mpls_input(ifp, m0);
949 		break;
950 #endif
951 	default:
952 		m_freem(m0);
953 		break;
954 	}
955 }
956 
957 int
958 tunkqfilter(dev_t dev, struct knote *kn)
959 {
960 	return (tun_dev_kqfilter(dev, kn));
961 }
962 
963 int
964 tapkqfilter(dev_t dev, struct knote *kn)
965 {
966 	return (tun_dev_kqfilter(dev, kn));
967 }
968 
969 int
970 tun_dev_kqfilter(dev_t dev, struct knote *kn)
971 {
972 	struct tun_softc	*sc;
973 	struct ifnet		*ifp;
974 	struct klist		*klist;
975 	int			 error = 0;
976 
977 	sc = tun_get(dev);
978 	if (sc == NULL)
979 		return (ENXIO);
980 
981 	ifp = &sc->sc_if;
982 
983 	switch (kn->kn_filter) {
984 	case EVFILT_READ:
985 		klist = &sc->sc_rklist;
986 		kn->kn_fop = &tunread_filtops;
987 		break;
988 	case EVFILT_WRITE:
989 		klist = &sc->sc_wklist;
990 		kn->kn_fop = &tunwrite_filtops;
991 		break;
992 	default:
993 		error = EINVAL;
994 		goto put;
995 	}
996 
997 	kn->kn_hook = sc;
998 
999 	klist_insert(klist, kn);
1000 
1001 put:
1002 	tun_put(sc);
1003 	return (error);
1004 }
1005 
1006 void
1007 filt_tunrdetach(struct knote *kn)
1008 {
1009 	struct tun_softc	*sc = kn->kn_hook;
1010 
1011 	klist_remove(&sc->sc_rklist, kn);
1012 }
1013 
1014 int
1015 filt_tunread(struct knote *kn, long hint)
1016 {
1017 	struct tun_softc	*sc = kn->kn_hook;
1018 	struct ifnet		*ifp = &sc->sc_if;
1019 
1020 	MUTEX_ASSERT_LOCKED(&sc->sc_mtx);
1021 
1022 	kn->kn_data = ifq_hdatalen(&ifp->if_snd);
1023 
1024 	return (kn->kn_data > 0);
1025 }
1026 
1027 void
1028 filt_tunwdetach(struct knote *kn)
1029 {
1030 	struct tun_softc	*sc = kn->kn_hook;
1031 
1032 	klist_remove(&sc->sc_wklist, kn);
1033 }
1034 
1035 int
1036 filt_tunwrite(struct knote *kn, long hint)
1037 {
1038 	struct tun_softc	*sc = kn->kn_hook;
1039 	struct ifnet		*ifp = &sc->sc_if;
1040 
1041 	MUTEX_ASSERT_LOCKED(&sc->sc_mtx);
1042 
1043 	kn->kn_data = ifp->if_hdrlen + ifp->if_hardmtu;
1044 
1045 	return (1);
1046 }
1047 
1048 int
1049 filt_tunmodify(struct kevent *kev, struct knote *kn)
1050 {
1051 	struct tun_softc	*sc = kn->kn_hook;
1052 	int			 active;
1053 
1054 	mtx_enter(&sc->sc_mtx);
1055 	active = knote_modify(kev, kn);
1056 	mtx_leave(&sc->sc_mtx);
1057 
1058 	return (active);
1059 }
1060 
1061 int
1062 filt_tunprocess(struct knote *kn, struct kevent *kev)
1063 {
1064 	struct tun_softc	*sc = kn->kn_hook;
1065 	int			 active;
1066 
1067 	mtx_enter(&sc->sc_mtx);
1068 	active = knote_process(kn, kev);
1069 	mtx_leave(&sc->sc_mtx);
1070 
1071 	return (active);
1072 }
1073 
1074 void
1075 tun_start(struct ifnet *ifp)
1076 {
1077 	struct tun_softc	*sc = ifp->if_softc;
1078 
1079 	splassert(IPL_NET);
1080 
1081 	if (ifq_len(&ifp->if_snd))
1082 		tun_wakeup(sc);
1083 }
1084 
1085 void
1086 tun_link_state(struct ifnet *ifp, int link_state)
1087 {
1088 	if (ifp->if_link_state != link_state) {
1089 		ifp->if_link_state = link_state;
1090 		if_link_state_change(ifp);
1091 	}
1092 }
1093