xref: /openbsd-src/sys/net/if_tun.c (revision c1a45aed656e7d5627c30c92421893a76f370ccb)
1 /*	$OpenBSD: if_tun.c,v 1.236 2022/02/26 02:15:45 dlg Exp $	*/
2 /*	$NetBSD: if_tun.c,v 1.24 1996/05/07 02:40:48 thorpej Exp $	*/
3 
4 /*
5  * Copyright (c) 1988, Julian Onions <Julian.Onions@nexor.co.uk>
6  * Nottingham University 1987.
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29 
30 /*
31  * This driver takes packets off the IP i/f and hands them up to a
32  * user process to have its wicked way with. This driver has its
33  * roots in a similar driver written by Phil Cockcroft (formerly) at
34  * UCL. This driver is based much more on read/write/select mode of
35  * operation though.
36  */
37 
38 /* #define	TUN_DEBUG	9 */
39 
40 #include <sys/param.h>
41 #include <sys/kernel.h>
42 #include <sys/proc.h>
43 #include <sys/systm.h>
44 #include <sys/mbuf.h>
45 #include <sys/sigio.h>
46 #include <sys/socket.h>
47 #include <sys/ioctl.h>
48 #include <sys/errno.h>
49 #include <sys/syslog.h>
50 #include <sys/selinfo.h>
51 #include <sys/fcntl.h>
52 #include <sys/time.h>
53 #include <sys/device.h>
54 #include <sys/vnode.h>
55 #include <sys/signalvar.h>
56 #include <sys/poll.h>
57 #include <sys/conf.h>
58 #include <sys/smr.h>
59 
60 #include <net/if.h>
61 #include <net/if_types.h>
62 #include <net/netisr.h>
63 #include <net/rtable.h>
64 
65 #include <netinet/in.h>
66 #include <netinet/if_ether.h>
67 
68 #include "bpfilter.h"
69 #if NBPFILTER > 0
70 #include <net/bpf.h>
71 #endif
72 
73 #ifdef MPLS
74 #include <netmpls/mpls.h>
75 #endif /* MPLS */
76 
77 #include <net/if_tun.h>
78 
79 struct tun_softc {
80 	struct arpcom		sc_ac;		/* ethernet common data */
81 #define sc_if			sc_ac.ac_if
82 	struct selinfo		sc_rsel;	/* read select */
83 	struct selinfo		sc_wsel;	/* write select (not used) */
84 	SMR_LIST_ENTRY(tun_softc)
85 				sc_entry;	/* all tunnel interfaces */
86 	int			sc_unit;
87 	struct sigio_ref	sc_sigio;	/* async I/O registration */
88 	unsigned int		sc_flags;	/* misc flags */
89 #define TUN_DEAD			(1 << 16)
90 
91 	dev_t			sc_dev;
92 	struct refcnt		sc_refs;
93 	unsigned int		sc_reading;
94 };
95 
96 #ifdef	TUN_DEBUG
97 int	tundebug = TUN_DEBUG;
98 #define TUNDEBUG(a)	(tundebug? printf a : 0)
99 #else
100 #define TUNDEBUG(a)	/* (tundebug? printf a : 0) */
101 #endif
102 
103 /* Only these IFF flags are changeable by TUNSIFINFO */
104 #define TUN_IFF_FLAGS (IFF_UP|IFF_POINTOPOINT|IFF_MULTICAST|IFF_BROADCAST)
105 
106 void	tunattach(int);
107 
108 int	tun_dev_open(dev_t, const struct if_clone *, int, struct proc *);
109 int	tun_dev_close(dev_t, struct proc *);
110 int	tun_dev_ioctl(dev_t, u_long, void *);
111 int	tun_dev_read(dev_t, struct uio *, int);
112 int	tun_dev_write(dev_t, struct uio *, int, int);
113 int	tun_dev_poll(dev_t, int, struct proc *);
114 int	tun_dev_kqfilter(dev_t, struct knote *);
115 
116 int	tun_ioctl(struct ifnet *, u_long, caddr_t);
117 void	tun_input(struct ifnet *, struct mbuf *);
118 int	tun_output(struct ifnet *, struct mbuf *, struct sockaddr *,
119 	    struct rtentry *);
120 int	tun_enqueue(struct ifnet *, struct mbuf *);
121 int	tun_clone_create(struct if_clone *, int);
122 int	tap_clone_create(struct if_clone *, int);
123 int	tun_create(struct if_clone *, int, int);
124 int	tun_clone_destroy(struct ifnet *);
125 void	tun_wakeup(struct tun_softc *);
126 int	tun_init(struct tun_softc *);
127 void	tun_start(struct ifnet *);
128 int	filt_tunread(struct knote *, long);
129 int	filt_tunwrite(struct knote *, long);
130 void	filt_tunrdetach(struct knote *);
131 void	filt_tunwdetach(struct knote *);
132 void	tun_link_state(struct ifnet *, int);
133 
134 const struct filterops tunread_filtops = {
135 	.f_flags	= FILTEROP_ISFD,
136 	.f_attach	= NULL,
137 	.f_detach	= filt_tunrdetach,
138 	.f_event	= filt_tunread,
139 };
140 
141 const struct filterops tunwrite_filtops = {
142 	.f_flags	= FILTEROP_ISFD,
143 	.f_attach	= NULL,
144 	.f_detach	= filt_tunwdetach,
145 	.f_event	= filt_tunwrite,
146 };
147 
148 SMR_LIST_HEAD(tun_list, tun_softc);
149 
150 struct if_clone tun_cloner =
151     IF_CLONE_INITIALIZER("tun", tun_clone_create, tun_clone_destroy);
152 
153 struct if_clone tap_cloner =
154     IF_CLONE_INITIALIZER("tap", tap_clone_create, tun_clone_destroy);
155 
156 void
157 tunattach(int n)
158 {
159 	if_clone_attach(&tun_cloner);
160 	if_clone_attach(&tap_cloner);
161 }
162 
163 int
164 tun_clone_create(struct if_clone *ifc, int unit)
165 {
166 	return (tun_create(ifc, unit, 0));
167 }
168 
169 int
170 tap_clone_create(struct if_clone *ifc, int unit)
171 {
172 	return (tun_create(ifc, unit, TUN_LAYER2));
173 }
174 
175 struct tun_list tun_devs_list = SMR_LIST_HEAD_INITIALIZER(tun_list);
176 
177 struct tun_softc *
178 tun_name_lookup(const char *name)
179 {
180 	struct tun_softc *sc;
181 
182 	KERNEL_ASSERT_LOCKED();
183 
184 	SMR_LIST_FOREACH_LOCKED(sc, &tun_devs_list, sc_entry) {
185 		if (strcmp(sc->sc_if.if_xname, name) == 0)
186 			return (sc);
187 	}
188 
189 	return (NULL);
190 }
191 
192 int
193 tun_insert(struct tun_softc *sc)
194 {
195 	int error = 0;
196 
197 	/* check for a race */
198 	if (tun_name_lookup(sc->sc_if.if_xname) != NULL)
199 		error = EEXIST;
200 	else {
201 		/* tun_name_lookup checks for the right lock already */
202 		SMR_LIST_INSERT_HEAD_LOCKED(&tun_devs_list, sc, sc_entry);
203 	}
204 
205 	return (error);
206 }
207 
208 int
209 tun_create(struct if_clone *ifc, int unit, int flags)
210 {
211 	struct tun_softc	*sc;
212 	struct ifnet		*ifp;
213 
214 	if (unit > minor(~0U))
215 		return (ENXIO);
216 
217 	KERNEL_ASSERT_LOCKED();
218 
219 	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
220 	refcnt_init(&sc->sc_refs);
221 
222 	ifp = &sc->sc_if;
223 	snprintf(ifp->if_xname, sizeof(ifp->if_xname),
224 	    "%s%d", ifc->ifc_name, unit);
225 	ifp->if_softc = sc;
226 
227 	/* this is enough state for tun_dev_open to work with */
228 
229 	if (tun_insert(sc) != 0)
230 		goto exists;
231 
232 	/* build the interface */
233 
234 	ifp->if_ioctl = tun_ioctl;
235 	ifp->if_enqueue = tun_enqueue;
236 	ifp->if_start = tun_start;
237 	ifp->if_hardmtu = TUNMRU;
238 	ifp->if_link_state = LINK_STATE_DOWN;
239 
240 	if_counters_alloc(ifp);
241 
242 	if ((flags & TUN_LAYER2) == 0) {
243 #if NBPFILTER > 0
244 		ifp->if_bpf_mtap = bpf_mtap;
245 #endif
246 		ifp->if_input = tun_input;
247 		ifp->if_output = tun_output;
248 		ifp->if_mtu = ETHERMTU;
249 		ifp->if_flags = (IFF_POINTOPOINT|IFF_MULTICAST);
250 		ifp->if_type = IFT_TUNNEL;
251 		ifp->if_hdrlen = sizeof(u_int32_t);
252 		ifp->if_rtrequest = p2p_rtrequest;
253 
254 		if_attach(ifp);
255 		if_alloc_sadl(ifp);
256 
257 #if NBPFILTER > 0
258 		bpfattach(&ifp->if_bpf, ifp, DLT_LOOP, sizeof(u_int32_t));
259 #endif
260 	} else {
261 		sc->sc_flags |= TUN_LAYER2;
262 		ether_fakeaddr(ifp);
263 		ifp->if_flags =
264 		    (IFF_BROADCAST|IFF_SIMPLEX|IFF_MULTICAST);
265 
266 		if_attach(ifp);
267 		ether_ifattach(ifp);
268 	}
269 
270 	sigio_init(&sc->sc_sigio);
271 
272 	/* tell tun_dev_open we're initialised */
273 
274 	sc->sc_flags |= TUN_INITED|TUN_STAYUP;
275 	wakeup(sc);
276 
277 	return (0);
278 
279 exists:
280 	free(sc, M_DEVBUF, sizeof(*sc));
281 	return (EEXIST);
282 }
283 
284 int
285 tun_clone_destroy(struct ifnet *ifp)
286 {
287 	struct tun_softc	*sc = ifp->if_softc;
288 	dev_t			 dev;
289 	int			 s;
290 
291 	KERNEL_ASSERT_LOCKED();
292 
293 	if (ISSET(sc->sc_flags, TUN_DEAD))
294 		return (ENXIO);
295 	SET(sc->sc_flags, TUN_DEAD);
296 
297 	/* kick userland off the device */
298 	dev = sc->sc_dev;
299 	if (dev) {
300 		struct vnode *vp;
301 
302 		if (vfinddev(dev, VCHR, &vp))
303 			VOP_REVOKE(vp, REVOKEALL);
304 
305 		KASSERT(sc->sc_dev == 0);
306 	}
307 
308 	/* prevent userland from getting to the device again */
309 	SMR_LIST_REMOVE_LOCKED(sc, sc_entry);
310 	smr_barrier();
311 
312 	/* help read() give up */
313 	if (sc->sc_reading)
314 		wakeup(&ifp->if_snd);
315 
316 	/* wait for device entrypoints to finish */
317 	refcnt_finalize(&sc->sc_refs, "tundtor");
318 
319 	s = splhigh();
320 	klist_invalidate(&sc->sc_rsel.si_note);
321 	klist_invalidate(&sc->sc_wsel.si_note);
322 	splx(s);
323 
324 	if (ISSET(sc->sc_flags, TUN_LAYER2))
325 		ether_ifdetach(ifp);
326 
327 	if_detach(ifp);
328 	sigio_free(&sc->sc_sigio);
329 
330 	free(sc, M_DEVBUF, sizeof *sc);
331 	return (0);
332 }
333 
334 static struct tun_softc *
335 tun_get(dev_t dev)
336 {
337 	struct tun_softc *sc;
338 
339 	smr_read_enter();
340 	SMR_LIST_FOREACH(sc, &tun_devs_list, sc_entry) {
341 		if (sc->sc_dev == dev) {
342 			refcnt_take(&sc->sc_refs);
343 			break;
344 		}
345 	}
346 	smr_read_leave();
347 
348 	return (sc);
349 }
350 
351 static inline void
352 tun_put(struct tun_softc *sc)
353 {
354 	refcnt_rele_wake(&sc->sc_refs);
355 }
356 
357 int
358 tunopen(dev_t dev, int flag, int mode, struct proc *p)
359 {
360 	return (tun_dev_open(dev, &tun_cloner, mode, p));
361 }
362 
363 int
364 tapopen(dev_t dev, int flag, int mode, struct proc *p)
365 {
366 	return (tun_dev_open(dev, &tap_cloner, mode, p));
367 }
368 
369 int
370 tun_dev_open(dev_t dev, const struct if_clone *ifc, int mode, struct proc *p)
371 {
372 	struct tun_softc *sc;
373 	struct ifnet *ifp;
374 	int error;
375 	u_short stayup = 0;
376 	struct vnode *vp;
377 
378 	char name[IFNAMSIZ];
379 	unsigned int rdomain;
380 
381 	/*
382 	 * Find the vnode associated with this open before we sleep
383 	 * and let something else revoke it. Our caller has a reference
384 	 * to it so we don't need to account for it.
385 	 */
386 	if (!vfinddev(dev, VCHR, &vp))
387 		panic("%s vfinddev failed", __func__);
388 
389 	snprintf(name, sizeof(name), "%s%u", ifc->ifc_name, minor(dev));
390 	rdomain = rtable_l2(p->p_p->ps_rtableid);
391 
392 	/* let's find or make an interface to work with */
393 	while ((sc = tun_name_lookup(name)) == NULL) {
394 		error = if_clone_create(name, rdomain);
395 		switch (error) {
396 		case 0: /* it's probably ours */
397 			stayup = TUN_STAYUP;
398 			/* FALLTHROUGH */
399 		case EEXIST: /* we may have lost a race with someone else */
400 			break;
401 		default:
402 			return (error);
403 		}
404 	}
405 
406 	refcnt_take(&sc->sc_refs);
407 
408 	/* wait for it to be fully constructed before we use it */
409 	for (;;) {
410 		if (ISSET(sc->sc_flags, TUN_DEAD)) {
411 			error = ENXIO;
412 			goto done;
413 		}
414 
415 		if (ISSET(sc->sc_flags, TUN_INITED))
416 			break;
417 
418 		error = tsleep_nsec(sc, PCATCH, "tuninit", INFSLP);
419 		if (error != 0) {
420 			/* XXX if_clone_destroy if stayup? */
421 			goto done;
422 		}
423 	}
424 
425 	/* Has tun_clone_destroy torn the rug out under us? */
426 	if (vp->v_type == VBAD) {
427 		error = ENXIO;
428 		goto done;
429 	}
430 
431 	if (sc->sc_dev != 0) {
432 		/* aww, we lost */
433 		error = EBUSY;
434 		goto done;
435 	}
436 	/* it's ours now */
437 	sc->sc_dev = dev;
438 	CLR(sc->sc_flags, stayup);
439 
440 	/* automatically mark the interface running on open */
441 	ifp = &sc->sc_if;
442 	NET_LOCK();
443 	SET(ifp->if_flags, IFF_UP | IFF_RUNNING);
444 	NET_UNLOCK();
445 	tun_link_state(ifp, LINK_STATE_FULL_DUPLEX);
446 	error = 0;
447 
448 done:
449 	tun_put(sc);
450 	return (error);
451 }
452 
453 /*
454  * tunclose - close the device; if closing the real device, flush pending
455  *  output and unless STAYUP bring down and destroy the interface.
456  */
457 int
458 tunclose(dev_t dev, int flag, int mode, struct proc *p)
459 {
460 	return (tun_dev_close(dev, p));
461 }
462 
463 int
464 tapclose(dev_t dev, int flag, int mode, struct proc *p)
465 {
466 	return (tun_dev_close(dev, p));
467 }
468 
469 int
470 tun_dev_close(dev_t dev, struct proc *p)
471 {
472 	struct tun_softc	*sc;
473 	struct ifnet		*ifp;
474 	int			 error = 0;
475 	char			 name[IFNAMSIZ];
476 	int			 destroy = 0;
477 
478 	sc = tun_get(dev);
479 	if (sc == NULL)
480 		return (ENXIO);
481 
482 	ifp = &sc->sc_if;
483 
484 	/*
485 	 * junk all pending output
486 	 */
487 	NET_LOCK();
488 	CLR(ifp->if_flags, IFF_UP | IFF_RUNNING);
489 	NET_UNLOCK();
490 	ifq_purge(&ifp->if_snd);
491 
492 	CLR(sc->sc_flags, TUN_ASYNC);
493 	selwakeup(&sc->sc_rsel);
494 	sigio_free(&sc->sc_sigio);
495 
496 	if (!ISSET(sc->sc_flags, TUN_DEAD)) {
497 		/* we can't hold a reference to sc before we start a dtor */
498 		if (!ISSET(sc->sc_flags, TUN_STAYUP)) {
499 			destroy = 1;
500 			strlcpy(name, ifp->if_xname, sizeof(name));
501 		} else {
502 			tun_link_state(ifp, LINK_STATE_DOWN);
503 		}
504 	}
505 
506 	sc->sc_dev = 0;
507 
508 	tun_put(sc);
509 
510 	if (destroy)
511 		if_clone_destroy(name);
512 
513 	return (error);
514 }
515 
516 int
517 tun_init(struct tun_softc *sc)
518 {
519 	struct ifnet	*ifp = &sc->sc_if;
520 	struct ifaddr	*ifa;
521 
522 	TUNDEBUG(("%s: tun_init\n", ifp->if_xname));
523 
524 	ifp->if_flags |= IFF_UP | IFF_RUNNING;
525 
526 	sc->sc_flags &= ~(TUN_IASET|TUN_DSTADDR|TUN_BRDADDR);
527 	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
528 		if (ifa->ifa_addr->sa_family == AF_INET) {
529 			struct sockaddr_in *sin;
530 
531 			sin = satosin(ifa->ifa_addr);
532 			if (sin && sin->sin_addr.s_addr)
533 				sc->sc_flags |= TUN_IASET;
534 
535 			if (ifp->if_flags & IFF_POINTOPOINT) {
536 				sin = satosin(ifa->ifa_dstaddr);
537 				if (sin && sin->sin_addr.s_addr)
538 					sc->sc_flags |= TUN_DSTADDR;
539 			} else
540 				sc->sc_flags &= ~TUN_DSTADDR;
541 
542 			if (ifp->if_flags & IFF_BROADCAST) {
543 				sin = satosin(ifa->ifa_broadaddr);
544 				if (sin && sin->sin_addr.s_addr)
545 					sc->sc_flags |= TUN_BRDADDR;
546 			} else
547 				sc->sc_flags &= ~TUN_BRDADDR;
548 		}
549 #ifdef INET6
550 		if (ifa->ifa_addr->sa_family == AF_INET6) {
551 			struct sockaddr_in6 *sin6;
552 
553 			sin6 = satosin6(ifa->ifa_addr);
554 			if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
555 				sc->sc_flags |= TUN_IASET;
556 
557 			if (ifp->if_flags & IFF_POINTOPOINT) {
558 				sin6 = satosin6(ifa->ifa_dstaddr);
559 				if (sin6 &&
560 				    !IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
561 					sc->sc_flags |= TUN_DSTADDR;
562 			} else
563 				sc->sc_flags &= ~TUN_DSTADDR;
564 		}
565 #endif /* INET6 */
566 	}
567 
568 	return (0);
569 }
570 
571 /*
572  * Process an ioctl request.
573  */
574 int
575 tun_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
576 {
577 	struct tun_softc	*sc = (struct tun_softc *)(ifp->if_softc);
578 	struct ifreq		*ifr = (struct ifreq *)data;
579 	int			 error = 0;
580 
581 	switch (cmd) {
582 	case SIOCSIFADDR:
583 		tun_init(sc);
584 		break;
585 	case SIOCSIFFLAGS:
586 		if (ISSET(ifp->if_flags, IFF_UP))
587 			SET(ifp->if_flags, IFF_RUNNING);
588 		else
589 			CLR(ifp->if_flags, IFF_RUNNING);
590 		break;
591 
592 	case SIOCSIFDSTADDR:
593 		tun_init(sc);
594 		TUNDEBUG(("%s: destination address set\n", ifp->if_xname));
595 		break;
596 	case SIOCSIFMTU:
597 		if (ifr->ifr_mtu < ETHERMIN || ifr->ifr_mtu > TUNMRU)
598 			error = EINVAL;
599 		else
600 			ifp->if_mtu = ifr->ifr_mtu;
601 		break;
602 	case SIOCADDMULTI:
603 	case SIOCDELMULTI:
604 		break;
605 	default:
606 		if (sc->sc_flags & TUN_LAYER2)
607 			error = ether_ioctl(ifp, &sc->sc_ac, cmd, data);
608 		else
609 			error = ENOTTY;
610 	}
611 
612 	return (error);
613 }
614 
615 /*
616  * tun_output - queue packets from higher level ready to put out.
617  */
618 int
619 tun_output(struct ifnet *ifp, struct mbuf *m0, struct sockaddr *dst,
620     struct rtentry *rt)
621 {
622 	u_int32_t		*af;
623 
624 	if (!ISSET(ifp->if_flags, IFF_RUNNING)) {
625 		m_freem(m0);
626 		return (EHOSTDOWN);
627 	}
628 
629 	M_PREPEND(m0, sizeof(*af), M_DONTWAIT);
630 	if (m0 == NULL)
631 		return (ENOBUFS);
632 	af = mtod(m0, u_int32_t *);
633 	*af = htonl(dst->sa_family);
634 
635 	return (if_enqueue(ifp, m0));
636 }
637 
638 int
639 tun_enqueue(struct ifnet *ifp, struct mbuf *m0)
640 {
641 	struct tun_softc	*sc = ifp->if_softc;
642 	int			 error;
643 
644 	error = ifq_enqueue(&ifp->if_snd, m0);
645 	if (error != 0)
646 		return (error);
647 
648 	tun_wakeup(sc);
649 
650 	return (0);
651 }
652 
653 void
654 tun_wakeup(struct tun_softc *sc)
655 {
656 	if (sc->sc_reading)
657 		wakeup(&sc->sc_if.if_snd);
658 
659 	selwakeup(&sc->sc_rsel);
660 	if (sc->sc_flags & TUN_ASYNC)
661 		pgsigio(&sc->sc_sigio, SIGIO, 0);
662 }
663 
664 /*
665  * the cdevsw interface is now pretty minimal.
666  */
667 int
668 tunioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p)
669 {
670 	return (tun_dev_ioctl(dev, cmd, data));
671 }
672 
673 int
674 tapioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p)
675 {
676 	return (tun_dev_ioctl(dev, cmd, data));
677 }
678 
679 int
680 tun_dev_ioctl(dev_t dev, u_long cmd, void *data)
681 {
682 	struct tun_softc	*sc;
683 	struct tuninfo		*tunp;
684 	int			 error = 0;
685 
686 	sc = tun_get(dev);
687 	if (sc == NULL)
688 		return (ENXIO);
689 
690 	switch (cmd) {
691 	case TUNSIFINFO:
692 		tunp = (struct tuninfo *)data;
693 		if (tunp->mtu < ETHERMIN || tunp->mtu > TUNMRU) {
694 			error = EINVAL;
695 			break;
696 		}
697 		if (tunp->type != sc->sc_if.if_type) {
698 			error = EINVAL;
699 			break;
700 		}
701 		sc->sc_if.if_mtu = tunp->mtu;
702 		sc->sc_if.if_flags =
703 		    (tunp->flags & TUN_IFF_FLAGS) |
704 		    (sc->sc_if.if_flags & ~TUN_IFF_FLAGS);
705 		sc->sc_if.if_baudrate = tunp->baudrate;
706 		break;
707 	case TUNGIFINFO:
708 		tunp = (struct tuninfo *)data;
709 		tunp->mtu = sc->sc_if.if_mtu;
710 		tunp->type = sc->sc_if.if_type;
711 		tunp->flags = sc->sc_if.if_flags;
712 		tunp->baudrate = sc->sc_if.if_baudrate;
713 		break;
714 #ifdef TUN_DEBUG
715 	case TUNSDEBUG:
716 		tundebug = *(int *)data;
717 		break;
718 	case TUNGDEBUG:
719 		*(int *)data = tundebug;
720 		break;
721 #endif
722 	case TUNSIFMODE:
723 		switch (*(int *)data & (IFF_POINTOPOINT|IFF_BROADCAST)) {
724 		case IFF_POINTOPOINT:
725 		case IFF_BROADCAST:
726 			sc->sc_if.if_flags &= ~TUN_IFF_FLAGS;
727 			sc->sc_if.if_flags |= *(int *)data & TUN_IFF_FLAGS;
728 			break;
729 		default:
730 			error = EINVAL;
731 			break;
732 		}
733 		break;
734 
735 	case FIONBIO:
736 		break;
737 	case FIOASYNC:
738 		if (*(int *)data)
739 			sc->sc_flags |= TUN_ASYNC;
740 		else
741 			sc->sc_flags &= ~TUN_ASYNC;
742 		break;
743 	case FIONREAD:
744 		*(int *)data = ifq_hdatalen(&sc->sc_if.if_snd);
745 		break;
746 	case FIOSETOWN:
747 	case TIOCSPGRP:
748 		error = sigio_setown(&sc->sc_sigio, cmd, data);
749 		break;
750 	case FIOGETOWN:
751 	case TIOCGPGRP:
752 		sigio_getown(&sc->sc_sigio, cmd, data);
753 		break;
754 	case SIOCGIFADDR:
755 		if (!(sc->sc_flags & TUN_LAYER2)) {
756 			error = EINVAL;
757 			break;
758 		}
759 		bcopy(sc->sc_ac.ac_enaddr, data,
760 		    sizeof(sc->sc_ac.ac_enaddr));
761 		break;
762 
763 	case SIOCSIFADDR:
764 		if (!(sc->sc_flags & TUN_LAYER2)) {
765 			error = EINVAL;
766 			break;
767 		}
768 		bcopy(data, sc->sc_ac.ac_enaddr,
769 		    sizeof(sc->sc_ac.ac_enaddr));
770 		break;
771 	default:
772 		error = ENOTTY;
773 		break;
774 	}
775 
776 	tun_put(sc);
777 	return (error);
778 }
779 
780 /*
781  * The cdevsw read interface - reads a packet at a time, or at
782  * least as much of a packet as can be read.
783  */
784 int
785 tunread(dev_t dev, struct uio *uio, int ioflag)
786 {
787 	return (tun_dev_read(dev, uio, ioflag));
788 }
789 
790 int
791 tapread(dev_t dev, struct uio *uio, int ioflag)
792 {
793 	return (tun_dev_read(dev, uio, ioflag));
794 }
795 
796 int
797 tun_dev_read(dev_t dev, struct uio *uio, int ioflag)
798 {
799 	struct tun_softc	*sc;
800 	struct ifnet		*ifp;
801 	struct mbuf		*m, *m0;
802 	int			 error = 0;
803 
804 	sc = tun_get(dev);
805 	if (sc == NULL)
806 		return (ENXIO);
807 
808 	ifp = &sc->sc_if;
809 
810 	error = ifq_deq_sleep(&ifp->if_snd, &m0, ISSET(ioflag, IO_NDELAY),
811 	    (PZERO + 1)|PCATCH, "tunread", &sc->sc_reading, &sc->sc_dev);
812 	if (error != 0)
813 		goto put;
814 
815 #if NBPFILTER > 0
816 	if (ifp->if_bpf)
817 		bpf_mtap(ifp->if_bpf, m0, BPF_DIRECTION_OUT);
818 #endif
819 
820 	m = m0;
821 	while (uio->uio_resid > 0) {
822 		size_t len = ulmin(uio->uio_resid, m->m_len);
823 		if (len > 0) {
824 			error = uiomove(mtod(m, void *), len, uio);
825 			if (error != 0)
826 				break;
827 		}
828 
829 		m = m->m_next;
830 		if (m == NULL)
831 			break;
832 	}
833 
834 	m_freem(m0);
835 
836 put:
837 	tun_put(sc);
838 	return (error);
839 }
840 
841 /*
842  * the cdevsw write interface - an atomic write is a packet - or else!
843  */
844 int
845 tunwrite(dev_t dev, struct uio *uio, int ioflag)
846 {
847 	return (tun_dev_write(dev, uio, ioflag, 0));
848 }
849 
850 int
851 tapwrite(dev_t dev, struct uio *uio, int ioflag)
852 {
853 	return (tun_dev_write(dev, uio, ioflag, ETHER_ALIGN));
854 }
855 
856 int
857 tun_dev_write(dev_t dev, struct uio *uio, int ioflag, int align)
858 {
859 	struct tun_softc	*sc;
860 	struct ifnet		*ifp;
861 	struct mbuf		*m0;
862 	int			error = 0;
863 	size_t			mlen;
864 
865 	sc = tun_get(dev);
866 	if (sc == NULL)
867 		return (ENXIO);
868 
869 	ifp = &sc->sc_if;
870 
871 	if (uio->uio_resid < ifp->if_hdrlen ||
872 	    uio->uio_resid > (ifp->if_hdrlen + ifp->if_hardmtu)) {
873 		error = EMSGSIZE;
874 		goto put;
875 	}
876 
877 	align += max_linkhdr;
878 	mlen = align + uio->uio_resid;
879 
880 	m0 = m_gethdr(M_DONTWAIT, MT_DATA);
881 	if (m0 == NULL) {
882 		error = ENOMEM;
883 		goto put;
884 	}
885 	if (mlen > MHLEN) {
886 		m_clget(m0, M_DONTWAIT, mlen);
887 		if (!ISSET(m0->m_flags, M_EXT)) {
888 			error = ENOMEM;
889 			goto drop;
890 		}
891 	}
892 
893 	m_align(m0, mlen);
894 	m0->m_pkthdr.len = m0->m_len = mlen;
895 	m_adj(m0, align);
896 
897 	error = uiomove(mtod(m0, void *), m0->m_len, uio);
898 	if (error != 0)
899 		goto drop;
900 
901 	NET_LOCK();
902 	if_vinput(ifp, m0);
903 	NET_UNLOCK();
904 
905 	tun_put(sc);
906 	return (0);
907 
908 drop:
909 	m_freem(m0);
910 put:
911 	tun_put(sc);
912 	return (error);
913 }
914 
915 void
916 tun_input(struct ifnet *ifp, struct mbuf *m0)
917 {
918 	uint32_t		af;
919 
920 	KASSERT(m0->m_len >= sizeof(af));
921 
922 	af = *mtod(m0, uint32_t *);
923 	/* strip the tunnel header */
924 	m_adj(m0, sizeof(af));
925 
926 	switch (ntohl(af)) {
927 	case AF_INET:
928 		ipv4_input(ifp, m0);
929 		break;
930 #ifdef INET6
931 	case AF_INET6:
932 		ipv6_input(ifp, m0);
933 		break;
934 #endif
935 #ifdef MPLS
936 	case AF_MPLS:
937 		mpls_input(ifp, m0);
938 		break;
939 #endif
940 	default:
941 		m_freem(m0);
942 		break;
943 	}
944 }
945 
946 /*
947  * tunpoll - the poll interface, this is only useful on reads
948  * really. The write detect always returns true, write never blocks
949  * anyway, it either accepts the packet or drops it.
950  */
951 int
952 tunpoll(dev_t dev, int events, struct proc *p)
953 {
954 	return (tun_dev_poll(dev, events, p));
955 }
956 
957 int
958 tappoll(dev_t dev, int events, struct proc *p)
959 {
960 	return (tun_dev_poll(dev, events, p));
961 }
962 
963 int
964 tun_dev_poll(dev_t dev, int events, struct proc *p)
965 {
966 	struct tun_softc	*sc;
967 	struct ifnet		*ifp;
968 	int			 revents;
969 
970 	sc = tun_get(dev);
971 	if (sc == NULL)
972 		return (POLLERR);
973 
974 	ifp = &sc->sc_if;
975 	revents = 0;
976 
977 	if (events & (POLLIN | POLLRDNORM)) {
978 		if (!ifq_empty(&ifp->if_snd))
979 			revents |= events & (POLLIN | POLLRDNORM);
980 		else
981 			selrecord(p, &sc->sc_rsel);
982 	}
983 	if (events & (POLLOUT | POLLWRNORM))
984 		revents |= events & (POLLOUT | POLLWRNORM);
985 
986 	tun_put(sc);
987 	return (revents);
988 }
989 
990 int
991 tunkqfilter(dev_t dev, struct knote *kn)
992 {
993 	return (tun_dev_kqfilter(dev, kn));
994 }
995 
996 int
997 tapkqfilter(dev_t dev, struct knote *kn)
998 {
999 	return (tun_dev_kqfilter(dev, kn));
1000 }
1001 
1002 int
1003 tun_dev_kqfilter(dev_t dev, struct knote *kn)
1004 {
1005 	struct tun_softc	*sc;
1006 	struct ifnet		*ifp;
1007 	struct klist		*klist;
1008 	int			 error = 0;
1009 	int			 s;
1010 
1011 	sc = tun_get(dev);
1012 	if (sc == NULL)
1013 		return (ENXIO);
1014 
1015 	ifp = &sc->sc_if;
1016 
1017 	switch (kn->kn_filter) {
1018 	case EVFILT_READ:
1019 		klist = &sc->sc_rsel.si_note;
1020 		kn->kn_fop = &tunread_filtops;
1021 		break;
1022 	case EVFILT_WRITE:
1023 		klist = &sc->sc_wsel.si_note;
1024 		kn->kn_fop = &tunwrite_filtops;
1025 		break;
1026 	default:
1027 		error = EINVAL;
1028 		goto put;
1029 	}
1030 
1031 	kn->kn_hook = (caddr_t)sc; /* XXX give the sc_ref to the hook? */
1032 
1033 	s = splhigh();
1034 	klist_insert_locked(klist, kn);
1035 	splx(s);
1036 
1037 put:
1038 	tun_put(sc);
1039 	return (error);
1040 }
1041 
1042 void
1043 filt_tunrdetach(struct knote *kn)
1044 {
1045 	int			 s;
1046 	struct tun_softc	*sc = kn->kn_hook;
1047 
1048 	s = splhigh();
1049 	klist_remove_locked(&sc->sc_rsel.si_note, kn);
1050 	splx(s);
1051 }
1052 
1053 int
1054 filt_tunread(struct knote *kn, long hint)
1055 {
1056 	struct tun_softc	*sc = kn->kn_hook;
1057 	struct ifnet		*ifp = &sc->sc_if;
1058 
1059 	kn->kn_data = ifq_hdatalen(&ifp->if_snd);
1060 
1061 	return (kn->kn_data > 0);
1062 }
1063 
1064 void
1065 filt_tunwdetach(struct knote *kn)
1066 {
1067 	int			 s;
1068 	struct tun_softc	*sc = kn->kn_hook;
1069 
1070 	s = splhigh();
1071 	klist_remove_locked(&sc->sc_wsel.si_note, kn);
1072 	splx(s);
1073 }
1074 
1075 int
1076 filt_tunwrite(struct knote *kn, long hint)
1077 {
1078 	struct tun_softc	*sc = kn->kn_hook;
1079 	struct ifnet		*ifp = &sc->sc_if;
1080 
1081 	kn->kn_data = ifp->if_hdrlen + ifp->if_hardmtu;
1082 
1083 	return (1);
1084 }
1085 
1086 void
1087 tun_start(struct ifnet *ifp)
1088 {
1089 	struct tun_softc	*sc = ifp->if_softc;
1090 
1091 	splassert(IPL_NET);
1092 
1093 	if (ifq_len(&ifp->if_snd))
1094 		tun_wakeup(sc);
1095 }
1096 
1097 void
1098 tun_link_state(struct ifnet *ifp, int link_state)
1099 {
1100 	if (ifp->if_link_state != link_state) {
1101 		ifp->if_link_state = link_state;
1102 		if_link_state_change(ifp);
1103 	}
1104 }
1105