xref: /openbsd-src/sys/net/if_tun.c (revision 25c4e8bd056e974b28f4a0ffd39d76c190a56013)
1 /*	$OpenBSD: if_tun.c,v 1.237 2022/07/02 08:50:42 visa Exp $	*/
2 /*	$NetBSD: if_tun.c,v 1.24 1996/05/07 02:40:48 thorpej Exp $	*/
3 
4 /*
5  * Copyright (c) 1988, Julian Onions <Julian.Onions@nexor.co.uk>
6  * Nottingham University 1987.
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29 
30 /*
31  * This driver takes packets off the IP i/f and hands them up to a
32  * user process to have its wicked way with. This driver has its
33  * roots in a similar driver written by Phil Cockcroft (formerly) at
34  * UCL. This driver is based much more on read/write/select mode of
35  * operation though.
36  */
37 
38 /* #define	TUN_DEBUG	9 */
39 
40 #include <sys/param.h>
41 #include <sys/kernel.h>
42 #include <sys/proc.h>
43 #include <sys/systm.h>
44 #include <sys/mbuf.h>
45 #include <sys/sigio.h>
46 #include <sys/socket.h>
47 #include <sys/ioctl.h>
48 #include <sys/errno.h>
49 #include <sys/syslog.h>
50 #include <sys/selinfo.h>
51 #include <sys/fcntl.h>
52 #include <sys/time.h>
53 #include <sys/device.h>
54 #include <sys/vnode.h>
55 #include <sys/signalvar.h>
56 #include <sys/conf.h>
57 #include <sys/smr.h>
58 
59 #include <net/if.h>
60 #include <net/if_types.h>
61 #include <net/netisr.h>
62 #include <net/rtable.h>
63 
64 #include <netinet/in.h>
65 #include <netinet/if_ether.h>
66 
67 #include "bpfilter.h"
68 #if NBPFILTER > 0
69 #include <net/bpf.h>
70 #endif
71 
72 #ifdef MPLS
73 #include <netmpls/mpls.h>
74 #endif /* MPLS */
75 
76 #include <net/if_tun.h>
77 
78 struct tun_softc {
79 	struct arpcom		sc_ac;		/* ethernet common data */
80 #define sc_if			sc_ac.ac_if
81 	struct selinfo		sc_rsel;	/* read select */
82 	struct selinfo		sc_wsel;	/* write select (not used) */
83 	SMR_LIST_ENTRY(tun_softc)
84 				sc_entry;	/* all tunnel interfaces */
85 	int			sc_unit;
86 	struct sigio_ref	sc_sigio;	/* async I/O registration */
87 	unsigned int		sc_flags;	/* misc flags */
88 #define TUN_DEAD			(1 << 16)
89 
90 	dev_t			sc_dev;
91 	struct refcnt		sc_refs;
92 	unsigned int		sc_reading;
93 };
94 
95 #ifdef	TUN_DEBUG
96 int	tundebug = TUN_DEBUG;
97 #define TUNDEBUG(a)	(tundebug? printf a : 0)
98 #else
99 #define TUNDEBUG(a)	/* (tundebug? printf a : 0) */
100 #endif
101 
102 /* Only these IFF flags are changeable by TUNSIFINFO */
103 #define TUN_IFF_FLAGS (IFF_UP|IFF_POINTOPOINT|IFF_MULTICAST|IFF_BROADCAST)
104 
105 void	tunattach(int);
106 
107 int	tun_dev_open(dev_t, const struct if_clone *, int, struct proc *);
108 int	tun_dev_close(dev_t, struct proc *);
109 int	tun_dev_ioctl(dev_t, u_long, void *);
110 int	tun_dev_read(dev_t, struct uio *, int);
111 int	tun_dev_write(dev_t, struct uio *, int, int);
112 int	tun_dev_kqfilter(dev_t, struct knote *);
113 
114 int	tun_ioctl(struct ifnet *, u_long, caddr_t);
115 void	tun_input(struct ifnet *, struct mbuf *);
116 int	tun_output(struct ifnet *, struct mbuf *, struct sockaddr *,
117 	    struct rtentry *);
118 int	tun_enqueue(struct ifnet *, struct mbuf *);
119 int	tun_clone_create(struct if_clone *, int);
120 int	tap_clone_create(struct if_clone *, int);
121 int	tun_create(struct if_clone *, int, int);
122 int	tun_clone_destroy(struct ifnet *);
123 void	tun_wakeup(struct tun_softc *);
124 int	tun_init(struct tun_softc *);
125 void	tun_start(struct ifnet *);
126 int	filt_tunread(struct knote *, long);
127 int	filt_tunwrite(struct knote *, long);
128 void	filt_tunrdetach(struct knote *);
129 void	filt_tunwdetach(struct knote *);
130 void	tun_link_state(struct ifnet *, int);
131 
132 const struct filterops tunread_filtops = {
133 	.f_flags	= FILTEROP_ISFD,
134 	.f_attach	= NULL,
135 	.f_detach	= filt_tunrdetach,
136 	.f_event	= filt_tunread,
137 };
138 
139 const struct filterops tunwrite_filtops = {
140 	.f_flags	= FILTEROP_ISFD,
141 	.f_attach	= NULL,
142 	.f_detach	= filt_tunwdetach,
143 	.f_event	= filt_tunwrite,
144 };
145 
146 SMR_LIST_HEAD(tun_list, tun_softc);
147 
148 struct if_clone tun_cloner =
149     IF_CLONE_INITIALIZER("tun", tun_clone_create, tun_clone_destroy);
150 
151 struct if_clone tap_cloner =
152     IF_CLONE_INITIALIZER("tap", tap_clone_create, tun_clone_destroy);
153 
154 void
155 tunattach(int n)
156 {
157 	if_clone_attach(&tun_cloner);
158 	if_clone_attach(&tap_cloner);
159 }
160 
161 int
162 tun_clone_create(struct if_clone *ifc, int unit)
163 {
164 	return (tun_create(ifc, unit, 0));
165 }
166 
167 int
168 tap_clone_create(struct if_clone *ifc, int unit)
169 {
170 	return (tun_create(ifc, unit, TUN_LAYER2));
171 }
172 
173 struct tun_list tun_devs_list = SMR_LIST_HEAD_INITIALIZER(tun_list);
174 
175 struct tun_softc *
176 tun_name_lookup(const char *name)
177 {
178 	struct tun_softc *sc;
179 
180 	KERNEL_ASSERT_LOCKED();
181 
182 	SMR_LIST_FOREACH_LOCKED(sc, &tun_devs_list, sc_entry) {
183 		if (strcmp(sc->sc_if.if_xname, name) == 0)
184 			return (sc);
185 	}
186 
187 	return (NULL);
188 }
189 
190 int
191 tun_insert(struct tun_softc *sc)
192 {
193 	int error = 0;
194 
195 	/* check for a race */
196 	if (tun_name_lookup(sc->sc_if.if_xname) != NULL)
197 		error = EEXIST;
198 	else {
199 		/* tun_name_lookup checks for the right lock already */
200 		SMR_LIST_INSERT_HEAD_LOCKED(&tun_devs_list, sc, sc_entry);
201 	}
202 
203 	return (error);
204 }
205 
206 int
207 tun_create(struct if_clone *ifc, int unit, int flags)
208 {
209 	struct tun_softc	*sc;
210 	struct ifnet		*ifp;
211 
212 	if (unit > minor(~0U))
213 		return (ENXIO);
214 
215 	KERNEL_ASSERT_LOCKED();
216 
217 	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
218 	refcnt_init(&sc->sc_refs);
219 
220 	ifp = &sc->sc_if;
221 	snprintf(ifp->if_xname, sizeof(ifp->if_xname),
222 	    "%s%d", ifc->ifc_name, unit);
223 	ifp->if_softc = sc;
224 
225 	/* this is enough state for tun_dev_open to work with */
226 
227 	if (tun_insert(sc) != 0)
228 		goto exists;
229 
230 	/* build the interface */
231 
232 	ifp->if_ioctl = tun_ioctl;
233 	ifp->if_enqueue = tun_enqueue;
234 	ifp->if_start = tun_start;
235 	ifp->if_hardmtu = TUNMRU;
236 	ifp->if_link_state = LINK_STATE_DOWN;
237 
238 	if_counters_alloc(ifp);
239 
240 	if ((flags & TUN_LAYER2) == 0) {
241 #if NBPFILTER > 0
242 		ifp->if_bpf_mtap = bpf_mtap;
243 #endif
244 		ifp->if_input = tun_input;
245 		ifp->if_output = tun_output;
246 		ifp->if_mtu = ETHERMTU;
247 		ifp->if_flags = (IFF_POINTOPOINT|IFF_MULTICAST);
248 		ifp->if_type = IFT_TUNNEL;
249 		ifp->if_hdrlen = sizeof(u_int32_t);
250 		ifp->if_rtrequest = p2p_rtrequest;
251 
252 		if_attach(ifp);
253 		if_alloc_sadl(ifp);
254 
255 #if NBPFILTER > 0
256 		bpfattach(&ifp->if_bpf, ifp, DLT_LOOP, sizeof(u_int32_t));
257 #endif
258 	} else {
259 		sc->sc_flags |= TUN_LAYER2;
260 		ether_fakeaddr(ifp);
261 		ifp->if_flags =
262 		    (IFF_BROADCAST|IFF_SIMPLEX|IFF_MULTICAST);
263 
264 		if_attach(ifp);
265 		ether_ifattach(ifp);
266 	}
267 
268 	sigio_init(&sc->sc_sigio);
269 
270 	/* tell tun_dev_open we're initialised */
271 
272 	sc->sc_flags |= TUN_INITED|TUN_STAYUP;
273 	wakeup(sc);
274 
275 	return (0);
276 
277 exists:
278 	free(sc, M_DEVBUF, sizeof(*sc));
279 	return (EEXIST);
280 }
281 
282 int
283 tun_clone_destroy(struct ifnet *ifp)
284 {
285 	struct tun_softc	*sc = ifp->if_softc;
286 	dev_t			 dev;
287 	int			 s;
288 
289 	KERNEL_ASSERT_LOCKED();
290 
291 	if (ISSET(sc->sc_flags, TUN_DEAD))
292 		return (ENXIO);
293 	SET(sc->sc_flags, TUN_DEAD);
294 
295 	/* kick userland off the device */
296 	dev = sc->sc_dev;
297 	if (dev) {
298 		struct vnode *vp;
299 
300 		if (vfinddev(dev, VCHR, &vp))
301 			VOP_REVOKE(vp, REVOKEALL);
302 
303 		KASSERT(sc->sc_dev == 0);
304 	}
305 
306 	/* prevent userland from getting to the device again */
307 	SMR_LIST_REMOVE_LOCKED(sc, sc_entry);
308 	smr_barrier();
309 
310 	/* help read() give up */
311 	if (sc->sc_reading)
312 		wakeup(&ifp->if_snd);
313 
314 	/* wait for device entrypoints to finish */
315 	refcnt_finalize(&sc->sc_refs, "tundtor");
316 
317 	s = splhigh();
318 	klist_invalidate(&sc->sc_rsel.si_note);
319 	klist_invalidate(&sc->sc_wsel.si_note);
320 	splx(s);
321 
322 	if (ISSET(sc->sc_flags, TUN_LAYER2))
323 		ether_ifdetach(ifp);
324 
325 	if_detach(ifp);
326 	sigio_free(&sc->sc_sigio);
327 
328 	free(sc, M_DEVBUF, sizeof *sc);
329 	return (0);
330 }
331 
332 static struct tun_softc *
333 tun_get(dev_t dev)
334 {
335 	struct tun_softc *sc;
336 
337 	smr_read_enter();
338 	SMR_LIST_FOREACH(sc, &tun_devs_list, sc_entry) {
339 		if (sc->sc_dev == dev) {
340 			refcnt_take(&sc->sc_refs);
341 			break;
342 		}
343 	}
344 	smr_read_leave();
345 
346 	return (sc);
347 }
348 
349 static inline void
350 tun_put(struct tun_softc *sc)
351 {
352 	refcnt_rele_wake(&sc->sc_refs);
353 }
354 
355 int
356 tunopen(dev_t dev, int flag, int mode, struct proc *p)
357 {
358 	return (tun_dev_open(dev, &tun_cloner, mode, p));
359 }
360 
361 int
362 tapopen(dev_t dev, int flag, int mode, struct proc *p)
363 {
364 	return (tun_dev_open(dev, &tap_cloner, mode, p));
365 }
366 
367 int
368 tun_dev_open(dev_t dev, const struct if_clone *ifc, int mode, struct proc *p)
369 {
370 	struct tun_softc *sc;
371 	struct ifnet *ifp;
372 	int error;
373 	u_short stayup = 0;
374 	struct vnode *vp;
375 
376 	char name[IFNAMSIZ];
377 	unsigned int rdomain;
378 
379 	/*
380 	 * Find the vnode associated with this open before we sleep
381 	 * and let something else revoke it. Our caller has a reference
382 	 * to it so we don't need to account for it.
383 	 */
384 	if (!vfinddev(dev, VCHR, &vp))
385 		panic("%s vfinddev failed", __func__);
386 
387 	snprintf(name, sizeof(name), "%s%u", ifc->ifc_name, minor(dev));
388 	rdomain = rtable_l2(p->p_p->ps_rtableid);
389 
390 	/* let's find or make an interface to work with */
391 	while ((sc = tun_name_lookup(name)) == NULL) {
392 		error = if_clone_create(name, rdomain);
393 		switch (error) {
394 		case 0: /* it's probably ours */
395 			stayup = TUN_STAYUP;
396 			/* FALLTHROUGH */
397 		case EEXIST: /* we may have lost a race with someone else */
398 			break;
399 		default:
400 			return (error);
401 		}
402 	}
403 
404 	refcnt_take(&sc->sc_refs);
405 
406 	/* wait for it to be fully constructed before we use it */
407 	for (;;) {
408 		if (ISSET(sc->sc_flags, TUN_DEAD)) {
409 			error = ENXIO;
410 			goto done;
411 		}
412 
413 		if (ISSET(sc->sc_flags, TUN_INITED))
414 			break;
415 
416 		error = tsleep_nsec(sc, PCATCH, "tuninit", INFSLP);
417 		if (error != 0) {
418 			/* XXX if_clone_destroy if stayup? */
419 			goto done;
420 		}
421 	}
422 
423 	/* Has tun_clone_destroy torn the rug out under us? */
424 	if (vp->v_type == VBAD) {
425 		error = ENXIO;
426 		goto done;
427 	}
428 
429 	if (sc->sc_dev != 0) {
430 		/* aww, we lost */
431 		error = EBUSY;
432 		goto done;
433 	}
434 	/* it's ours now */
435 	sc->sc_dev = dev;
436 	CLR(sc->sc_flags, stayup);
437 
438 	/* automatically mark the interface running on open */
439 	ifp = &sc->sc_if;
440 	NET_LOCK();
441 	SET(ifp->if_flags, IFF_UP | IFF_RUNNING);
442 	NET_UNLOCK();
443 	tun_link_state(ifp, LINK_STATE_FULL_DUPLEX);
444 	error = 0;
445 
446 done:
447 	tun_put(sc);
448 	return (error);
449 }
450 
451 /*
452  * tunclose - close the device; if closing the real device, flush pending
453  *  output and unless STAYUP bring down and destroy the interface.
454  */
455 int
456 tunclose(dev_t dev, int flag, int mode, struct proc *p)
457 {
458 	return (tun_dev_close(dev, p));
459 }
460 
461 int
462 tapclose(dev_t dev, int flag, int mode, struct proc *p)
463 {
464 	return (tun_dev_close(dev, p));
465 }
466 
467 int
468 tun_dev_close(dev_t dev, struct proc *p)
469 {
470 	struct tun_softc	*sc;
471 	struct ifnet		*ifp;
472 	int			 error = 0;
473 	char			 name[IFNAMSIZ];
474 	int			 destroy = 0;
475 
476 	sc = tun_get(dev);
477 	if (sc == NULL)
478 		return (ENXIO);
479 
480 	ifp = &sc->sc_if;
481 
482 	/*
483 	 * junk all pending output
484 	 */
485 	NET_LOCK();
486 	CLR(ifp->if_flags, IFF_UP | IFF_RUNNING);
487 	NET_UNLOCK();
488 	ifq_purge(&ifp->if_snd);
489 
490 	CLR(sc->sc_flags, TUN_ASYNC);
491 	selwakeup(&sc->sc_rsel);
492 	sigio_free(&sc->sc_sigio);
493 
494 	if (!ISSET(sc->sc_flags, TUN_DEAD)) {
495 		/* we can't hold a reference to sc before we start a dtor */
496 		if (!ISSET(sc->sc_flags, TUN_STAYUP)) {
497 			destroy = 1;
498 			strlcpy(name, ifp->if_xname, sizeof(name));
499 		} else {
500 			tun_link_state(ifp, LINK_STATE_DOWN);
501 		}
502 	}
503 
504 	sc->sc_dev = 0;
505 
506 	tun_put(sc);
507 
508 	if (destroy)
509 		if_clone_destroy(name);
510 
511 	return (error);
512 }
513 
514 int
515 tun_init(struct tun_softc *sc)
516 {
517 	struct ifnet	*ifp = &sc->sc_if;
518 	struct ifaddr	*ifa;
519 
520 	TUNDEBUG(("%s: tun_init\n", ifp->if_xname));
521 
522 	ifp->if_flags |= IFF_UP | IFF_RUNNING;
523 
524 	sc->sc_flags &= ~(TUN_IASET|TUN_DSTADDR|TUN_BRDADDR);
525 	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
526 		if (ifa->ifa_addr->sa_family == AF_INET) {
527 			struct sockaddr_in *sin;
528 
529 			sin = satosin(ifa->ifa_addr);
530 			if (sin && sin->sin_addr.s_addr)
531 				sc->sc_flags |= TUN_IASET;
532 
533 			if (ifp->if_flags & IFF_POINTOPOINT) {
534 				sin = satosin(ifa->ifa_dstaddr);
535 				if (sin && sin->sin_addr.s_addr)
536 					sc->sc_flags |= TUN_DSTADDR;
537 			} else
538 				sc->sc_flags &= ~TUN_DSTADDR;
539 
540 			if (ifp->if_flags & IFF_BROADCAST) {
541 				sin = satosin(ifa->ifa_broadaddr);
542 				if (sin && sin->sin_addr.s_addr)
543 					sc->sc_flags |= TUN_BRDADDR;
544 			} else
545 				sc->sc_flags &= ~TUN_BRDADDR;
546 		}
547 #ifdef INET6
548 		if (ifa->ifa_addr->sa_family == AF_INET6) {
549 			struct sockaddr_in6 *sin6;
550 
551 			sin6 = satosin6(ifa->ifa_addr);
552 			if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
553 				sc->sc_flags |= TUN_IASET;
554 
555 			if (ifp->if_flags & IFF_POINTOPOINT) {
556 				sin6 = satosin6(ifa->ifa_dstaddr);
557 				if (sin6 &&
558 				    !IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
559 					sc->sc_flags |= TUN_DSTADDR;
560 			} else
561 				sc->sc_flags &= ~TUN_DSTADDR;
562 		}
563 #endif /* INET6 */
564 	}
565 
566 	return (0);
567 }
568 
569 /*
570  * Process an ioctl request.
571  */
572 int
573 tun_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
574 {
575 	struct tun_softc	*sc = (struct tun_softc *)(ifp->if_softc);
576 	struct ifreq		*ifr = (struct ifreq *)data;
577 	int			 error = 0;
578 
579 	switch (cmd) {
580 	case SIOCSIFADDR:
581 		tun_init(sc);
582 		break;
583 	case SIOCSIFFLAGS:
584 		if (ISSET(ifp->if_flags, IFF_UP))
585 			SET(ifp->if_flags, IFF_RUNNING);
586 		else
587 			CLR(ifp->if_flags, IFF_RUNNING);
588 		break;
589 
590 	case SIOCSIFDSTADDR:
591 		tun_init(sc);
592 		TUNDEBUG(("%s: destination address set\n", ifp->if_xname));
593 		break;
594 	case SIOCSIFMTU:
595 		if (ifr->ifr_mtu < ETHERMIN || ifr->ifr_mtu > TUNMRU)
596 			error = EINVAL;
597 		else
598 			ifp->if_mtu = ifr->ifr_mtu;
599 		break;
600 	case SIOCADDMULTI:
601 	case SIOCDELMULTI:
602 		break;
603 	default:
604 		if (sc->sc_flags & TUN_LAYER2)
605 			error = ether_ioctl(ifp, &sc->sc_ac, cmd, data);
606 		else
607 			error = ENOTTY;
608 	}
609 
610 	return (error);
611 }
612 
613 /*
614  * tun_output - queue packets from higher level ready to put out.
615  */
616 int
617 tun_output(struct ifnet *ifp, struct mbuf *m0, struct sockaddr *dst,
618     struct rtentry *rt)
619 {
620 	u_int32_t		*af;
621 
622 	if (!ISSET(ifp->if_flags, IFF_RUNNING)) {
623 		m_freem(m0);
624 		return (EHOSTDOWN);
625 	}
626 
627 	M_PREPEND(m0, sizeof(*af), M_DONTWAIT);
628 	if (m0 == NULL)
629 		return (ENOBUFS);
630 	af = mtod(m0, u_int32_t *);
631 	*af = htonl(dst->sa_family);
632 
633 	return (if_enqueue(ifp, m0));
634 }
635 
636 int
637 tun_enqueue(struct ifnet *ifp, struct mbuf *m0)
638 {
639 	struct tun_softc	*sc = ifp->if_softc;
640 	int			 error;
641 
642 	error = ifq_enqueue(&ifp->if_snd, m0);
643 	if (error != 0)
644 		return (error);
645 
646 	tun_wakeup(sc);
647 
648 	return (0);
649 }
650 
651 void
652 tun_wakeup(struct tun_softc *sc)
653 {
654 	if (sc->sc_reading)
655 		wakeup(&sc->sc_if.if_snd);
656 
657 	selwakeup(&sc->sc_rsel);
658 	if (sc->sc_flags & TUN_ASYNC)
659 		pgsigio(&sc->sc_sigio, SIGIO, 0);
660 }
661 
662 /*
663  * the cdevsw interface is now pretty minimal.
664  */
665 int
666 tunioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p)
667 {
668 	return (tun_dev_ioctl(dev, cmd, data));
669 }
670 
671 int
672 tapioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p)
673 {
674 	return (tun_dev_ioctl(dev, cmd, data));
675 }
676 
677 int
678 tun_dev_ioctl(dev_t dev, u_long cmd, void *data)
679 {
680 	struct tun_softc	*sc;
681 	struct tuninfo		*tunp;
682 	int			 error = 0;
683 
684 	sc = tun_get(dev);
685 	if (sc == NULL)
686 		return (ENXIO);
687 
688 	switch (cmd) {
689 	case TUNSIFINFO:
690 		tunp = (struct tuninfo *)data;
691 		if (tunp->mtu < ETHERMIN || tunp->mtu > TUNMRU) {
692 			error = EINVAL;
693 			break;
694 		}
695 		if (tunp->type != sc->sc_if.if_type) {
696 			error = EINVAL;
697 			break;
698 		}
699 		sc->sc_if.if_mtu = tunp->mtu;
700 		sc->sc_if.if_flags =
701 		    (tunp->flags & TUN_IFF_FLAGS) |
702 		    (sc->sc_if.if_flags & ~TUN_IFF_FLAGS);
703 		sc->sc_if.if_baudrate = tunp->baudrate;
704 		break;
705 	case TUNGIFINFO:
706 		tunp = (struct tuninfo *)data;
707 		tunp->mtu = sc->sc_if.if_mtu;
708 		tunp->type = sc->sc_if.if_type;
709 		tunp->flags = sc->sc_if.if_flags;
710 		tunp->baudrate = sc->sc_if.if_baudrate;
711 		break;
712 #ifdef TUN_DEBUG
713 	case TUNSDEBUG:
714 		tundebug = *(int *)data;
715 		break;
716 	case TUNGDEBUG:
717 		*(int *)data = tundebug;
718 		break;
719 #endif
720 	case TUNSIFMODE:
721 		switch (*(int *)data & (IFF_POINTOPOINT|IFF_BROADCAST)) {
722 		case IFF_POINTOPOINT:
723 		case IFF_BROADCAST:
724 			sc->sc_if.if_flags &= ~TUN_IFF_FLAGS;
725 			sc->sc_if.if_flags |= *(int *)data & TUN_IFF_FLAGS;
726 			break;
727 		default:
728 			error = EINVAL;
729 			break;
730 		}
731 		break;
732 
733 	case FIONBIO:
734 		break;
735 	case FIOASYNC:
736 		if (*(int *)data)
737 			sc->sc_flags |= TUN_ASYNC;
738 		else
739 			sc->sc_flags &= ~TUN_ASYNC;
740 		break;
741 	case FIONREAD:
742 		*(int *)data = ifq_hdatalen(&sc->sc_if.if_snd);
743 		break;
744 	case FIOSETOWN:
745 	case TIOCSPGRP:
746 		error = sigio_setown(&sc->sc_sigio, cmd, data);
747 		break;
748 	case FIOGETOWN:
749 	case TIOCGPGRP:
750 		sigio_getown(&sc->sc_sigio, cmd, data);
751 		break;
752 	case SIOCGIFADDR:
753 		if (!(sc->sc_flags & TUN_LAYER2)) {
754 			error = EINVAL;
755 			break;
756 		}
757 		bcopy(sc->sc_ac.ac_enaddr, data,
758 		    sizeof(sc->sc_ac.ac_enaddr));
759 		break;
760 
761 	case SIOCSIFADDR:
762 		if (!(sc->sc_flags & TUN_LAYER2)) {
763 			error = EINVAL;
764 			break;
765 		}
766 		bcopy(data, sc->sc_ac.ac_enaddr,
767 		    sizeof(sc->sc_ac.ac_enaddr));
768 		break;
769 	default:
770 		error = ENOTTY;
771 		break;
772 	}
773 
774 	tun_put(sc);
775 	return (error);
776 }
777 
778 /*
779  * The cdevsw read interface - reads a packet at a time, or at
780  * least as much of a packet as can be read.
781  */
782 int
783 tunread(dev_t dev, struct uio *uio, int ioflag)
784 {
785 	return (tun_dev_read(dev, uio, ioflag));
786 }
787 
788 int
789 tapread(dev_t dev, struct uio *uio, int ioflag)
790 {
791 	return (tun_dev_read(dev, uio, ioflag));
792 }
793 
794 int
795 tun_dev_read(dev_t dev, struct uio *uio, int ioflag)
796 {
797 	struct tun_softc	*sc;
798 	struct ifnet		*ifp;
799 	struct mbuf		*m, *m0;
800 	int			 error = 0;
801 
802 	sc = tun_get(dev);
803 	if (sc == NULL)
804 		return (ENXIO);
805 
806 	ifp = &sc->sc_if;
807 
808 	error = ifq_deq_sleep(&ifp->if_snd, &m0, ISSET(ioflag, IO_NDELAY),
809 	    (PZERO + 1)|PCATCH, "tunread", &sc->sc_reading, &sc->sc_dev);
810 	if (error != 0)
811 		goto put;
812 
813 #if NBPFILTER > 0
814 	if (ifp->if_bpf)
815 		bpf_mtap(ifp->if_bpf, m0, BPF_DIRECTION_OUT);
816 #endif
817 
818 	m = m0;
819 	while (uio->uio_resid > 0) {
820 		size_t len = ulmin(uio->uio_resid, m->m_len);
821 		if (len > 0) {
822 			error = uiomove(mtod(m, void *), len, uio);
823 			if (error != 0)
824 				break;
825 		}
826 
827 		m = m->m_next;
828 		if (m == NULL)
829 			break;
830 	}
831 
832 	m_freem(m0);
833 
834 put:
835 	tun_put(sc);
836 	return (error);
837 }
838 
839 /*
840  * the cdevsw write interface - an atomic write is a packet - or else!
841  */
842 int
843 tunwrite(dev_t dev, struct uio *uio, int ioflag)
844 {
845 	return (tun_dev_write(dev, uio, ioflag, 0));
846 }
847 
848 int
849 tapwrite(dev_t dev, struct uio *uio, int ioflag)
850 {
851 	return (tun_dev_write(dev, uio, ioflag, ETHER_ALIGN));
852 }
853 
854 int
855 tun_dev_write(dev_t dev, struct uio *uio, int ioflag, int align)
856 {
857 	struct tun_softc	*sc;
858 	struct ifnet		*ifp;
859 	struct mbuf		*m0;
860 	int			error = 0;
861 	size_t			mlen;
862 
863 	sc = tun_get(dev);
864 	if (sc == NULL)
865 		return (ENXIO);
866 
867 	ifp = &sc->sc_if;
868 
869 	if (uio->uio_resid < ifp->if_hdrlen ||
870 	    uio->uio_resid > (ifp->if_hdrlen + ifp->if_hardmtu)) {
871 		error = EMSGSIZE;
872 		goto put;
873 	}
874 
875 	align += max_linkhdr;
876 	mlen = align + uio->uio_resid;
877 
878 	m0 = m_gethdr(M_DONTWAIT, MT_DATA);
879 	if (m0 == NULL) {
880 		error = ENOMEM;
881 		goto put;
882 	}
883 	if (mlen > MHLEN) {
884 		m_clget(m0, M_DONTWAIT, mlen);
885 		if (!ISSET(m0->m_flags, M_EXT)) {
886 			error = ENOMEM;
887 			goto drop;
888 		}
889 	}
890 
891 	m_align(m0, mlen);
892 	m0->m_pkthdr.len = m0->m_len = mlen;
893 	m_adj(m0, align);
894 
895 	error = uiomove(mtod(m0, void *), m0->m_len, uio);
896 	if (error != 0)
897 		goto drop;
898 
899 	NET_LOCK();
900 	if_vinput(ifp, m0);
901 	NET_UNLOCK();
902 
903 	tun_put(sc);
904 	return (0);
905 
906 drop:
907 	m_freem(m0);
908 put:
909 	tun_put(sc);
910 	return (error);
911 }
912 
913 void
914 tun_input(struct ifnet *ifp, struct mbuf *m0)
915 {
916 	uint32_t		af;
917 
918 	KASSERT(m0->m_len >= sizeof(af));
919 
920 	af = *mtod(m0, uint32_t *);
921 	/* strip the tunnel header */
922 	m_adj(m0, sizeof(af));
923 
924 	switch (ntohl(af)) {
925 	case AF_INET:
926 		ipv4_input(ifp, m0);
927 		break;
928 #ifdef INET6
929 	case AF_INET6:
930 		ipv6_input(ifp, m0);
931 		break;
932 #endif
933 #ifdef MPLS
934 	case AF_MPLS:
935 		mpls_input(ifp, m0);
936 		break;
937 #endif
938 	default:
939 		m_freem(m0);
940 		break;
941 	}
942 }
943 
944 int
945 tunkqfilter(dev_t dev, struct knote *kn)
946 {
947 	return (tun_dev_kqfilter(dev, kn));
948 }
949 
950 int
951 tapkqfilter(dev_t dev, struct knote *kn)
952 {
953 	return (tun_dev_kqfilter(dev, kn));
954 }
955 
956 int
957 tun_dev_kqfilter(dev_t dev, struct knote *kn)
958 {
959 	struct tun_softc	*sc;
960 	struct ifnet		*ifp;
961 	struct klist		*klist;
962 	int			 error = 0;
963 	int			 s;
964 
965 	sc = tun_get(dev);
966 	if (sc == NULL)
967 		return (ENXIO);
968 
969 	ifp = &sc->sc_if;
970 
971 	switch (kn->kn_filter) {
972 	case EVFILT_READ:
973 		klist = &sc->sc_rsel.si_note;
974 		kn->kn_fop = &tunread_filtops;
975 		break;
976 	case EVFILT_WRITE:
977 		klist = &sc->sc_wsel.si_note;
978 		kn->kn_fop = &tunwrite_filtops;
979 		break;
980 	default:
981 		error = EINVAL;
982 		goto put;
983 	}
984 
985 	kn->kn_hook = (caddr_t)sc; /* XXX give the sc_ref to the hook? */
986 
987 	s = splhigh();
988 	klist_insert_locked(klist, kn);
989 	splx(s);
990 
991 put:
992 	tun_put(sc);
993 	return (error);
994 }
995 
996 void
997 filt_tunrdetach(struct knote *kn)
998 {
999 	int			 s;
1000 	struct tun_softc	*sc = kn->kn_hook;
1001 
1002 	s = splhigh();
1003 	klist_remove_locked(&sc->sc_rsel.si_note, kn);
1004 	splx(s);
1005 }
1006 
1007 int
1008 filt_tunread(struct knote *kn, long hint)
1009 {
1010 	struct tun_softc	*sc = kn->kn_hook;
1011 	struct ifnet		*ifp = &sc->sc_if;
1012 
1013 	kn->kn_data = ifq_hdatalen(&ifp->if_snd);
1014 
1015 	return (kn->kn_data > 0);
1016 }
1017 
1018 void
1019 filt_tunwdetach(struct knote *kn)
1020 {
1021 	int			 s;
1022 	struct tun_softc	*sc = kn->kn_hook;
1023 
1024 	s = splhigh();
1025 	klist_remove_locked(&sc->sc_wsel.si_note, kn);
1026 	splx(s);
1027 }
1028 
1029 int
1030 filt_tunwrite(struct knote *kn, long hint)
1031 {
1032 	struct tun_softc	*sc = kn->kn_hook;
1033 	struct ifnet		*ifp = &sc->sc_if;
1034 
1035 	kn->kn_data = ifp->if_hdrlen + ifp->if_hardmtu;
1036 
1037 	return (1);
1038 }
1039 
1040 void
1041 tun_start(struct ifnet *ifp)
1042 {
1043 	struct tun_softc	*sc = ifp->if_softc;
1044 
1045 	splassert(IPL_NET);
1046 
1047 	if (ifq_len(&ifp->if_snd))
1048 		tun_wakeup(sc);
1049 }
1050 
1051 void
1052 tun_link_state(struct ifnet *ifp, int link_state)
1053 {
1054 	if (ifp->if_link_state != link_state) {
1055 		ifp->if_link_state = link_state;
1056 		if_link_state_change(ifp);
1057 	}
1058 }
1059