xref: /openbsd-src/sys/net/if_tun.c (revision 4b70baf6e17fc8b27fc1f7fa7929335753fa94c3)
1 /*	$OpenBSD: if_tun.c,v 1.185 2019/05/01 06:11:46 dlg Exp $	*/
2 /*	$NetBSD: if_tun.c,v 1.24 1996/05/07 02:40:48 thorpej Exp $	*/
3 
4 /*
5  * Copyright (c) 1988, Julian Onions <Julian.Onions@nexor.co.uk>
6  * Nottingham University 1987.
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29 
30 /*
31  * This driver takes packets off the IP i/f and hands them up to a
32  * user process to have its wicked way with. This driver has its
33  * roots in a similar driver written by Phil Cockcroft (formerly) at
34  * UCL. This driver is based much more on read/write/select mode of
35  * operation though.
36  */
37 
38 /* #define	TUN_DEBUG	9 */
39 
40 #include <sys/param.h>
41 #include <sys/kernel.h>
42 #include <sys/proc.h>
43 #include <sys/systm.h>
44 #include <sys/mbuf.h>
45 #include <sys/protosw.h>
46 #include <sys/socket.h>
47 #include <sys/ioctl.h>
48 #include <sys/errno.h>
49 #include <sys/syslog.h>
50 #include <sys/selinfo.h>
51 #include <sys/fcntl.h>
52 #include <sys/time.h>
53 #include <sys/device.h>
54 #include <sys/vnode.h>
55 #include <sys/signalvar.h>
56 #include <sys/poll.h>
57 #include <sys/conf.h>
58 
59 
60 #include <net/if.h>
61 #include <net/if_types.h>
62 #include <net/netisr.h>
63 #include <net/rtable.h>
64 
65 #include <netinet/in.h>
66 #include <netinet/if_ether.h>
67 
68 #ifdef PIPEX
69 #include <net/pipex.h>
70 #endif
71 
72 #include "bpfilter.h"
73 #if NBPFILTER > 0
74 #include <net/bpf.h>
75 #endif
76 
77 #ifdef MPLS
78 #include <netmpls/mpls.h>
79 #endif /* MPLS */
80 
81 #include <net/if_tun.h>
82 
83 struct tun_softc {
84 	struct arpcom	arpcom;		/* ethernet common data */
85 	struct selinfo	tun_rsel;	/* read select */
86 	struct selinfo	tun_wsel;	/* write select (not used) */
87 	LIST_ENTRY(tun_softc) entry;	/* all tunnel interfaces */
88 	int		tun_unit;
89 	uid_t		tun_siguid;	/* uid for process that set tun_pgid */
90 	uid_t		tun_sigeuid;	/* euid for process that set tun_pgid */
91 	pid_t		tun_pgid;	/* the process group - if any */
92 	u_short		tun_flags;	/* misc flags */
93 #define tun_if	arpcom.ac_if
94 #ifdef PIPEX
95 	struct pipex_iface_context pipex_iface; /* pipex context */
96 #endif
97 };
98 
99 #ifdef	TUN_DEBUG
100 int	tundebug = TUN_DEBUG;
101 #define TUNDEBUG(a)	(tundebug? printf a : 0)
102 #else
103 #define TUNDEBUG(a)	/* (tundebug? printf a : 0) */
104 #endif
105 
106 /* Only these IFF flags are changeable by TUNSIFINFO */
107 #define TUN_IFF_FLAGS (IFF_UP|IFF_POINTOPOINT|IFF_MULTICAST|IFF_BROADCAST)
108 
109 void	tunattach(int);
110 
111 /* cdev functions */
112 int	tunopen(dev_t, int, int, struct proc *);
113 int	tunclose(dev_t, int, int, struct proc *);
114 int	tunioctl(dev_t, u_long, caddr_t, int, struct proc *);
115 int	tunread(dev_t, struct uio *, int);
116 int	tunwrite(dev_t, struct uio *, int);
117 int	tunpoll(dev_t, int, struct proc *);
118 int	tunkqfilter(dev_t, struct knote *);
119 
120 int	tapopen(dev_t, int, int, struct proc *);
121 int	tapclose(dev_t, int, int, struct proc *);
122 int	tapioctl(dev_t, u_long, caddr_t, int, struct proc *);
123 int	tapread(dev_t, struct uio *, int);
124 int	tapwrite(dev_t, struct uio *, int);
125 int	tappoll(dev_t, int, struct proc *);
126 int	tapkqfilter(dev_t, struct knote *);
127 
128 int	tun_dev_open(struct tun_softc *, int, int, struct proc *);
129 int	tun_dev_close(struct tun_softc *, int, int, struct proc *);
130 int	tun_dev_ioctl(struct tun_softc *, u_long, caddr_t, int, struct proc *);
131 int	tun_dev_read(struct tun_softc *, struct uio *, int);
132 int	tun_dev_write(struct tun_softc *, struct uio *, int);
133 int	tun_dev_poll(struct tun_softc *, int, struct proc *);
134 int	tun_dev_kqfilter(struct tun_softc *, struct knote *);
135 
136 
137 int	tun_ioctl(struct ifnet *, u_long, caddr_t);
138 int	tun_output(struct ifnet *, struct mbuf *, struct sockaddr *,
139 	    struct rtentry *);
140 int	tun_clone_create(struct if_clone *, int);
141 int	tap_clone_create(struct if_clone *, int);
142 int	tun_create(struct if_clone *, int, int);
143 int	tun_clone_destroy(struct ifnet *);
144 static inline struct	tun_softc *tun_lookup(int);
145 static inline struct	tun_softc *tap_lookup(int);
146 void	tun_wakeup(struct tun_softc *);
147 int	tun_init(struct tun_softc *);
148 void	tun_start(struct ifnet *);
149 int	filt_tunread(struct knote *, long);
150 int	filt_tunwrite(struct knote *, long);
151 void	filt_tunrdetach(struct knote *);
152 void	filt_tunwdetach(struct knote *);
153 void	tun_link_state(struct tun_softc *);
154 
155 struct filterops tunread_filtops =
156 	{ 1, NULL, filt_tunrdetach, filt_tunread};
157 
158 struct filterops tunwrite_filtops =
159 	{ 1, NULL, filt_tunwdetach, filt_tunwrite};
160 
161 LIST_HEAD(, tun_softc) tun_softc_list;
162 LIST_HEAD(, tun_softc) tap_softc_list;
163 
164 struct if_clone tun_cloner =
165     IF_CLONE_INITIALIZER("tun", tun_clone_create, tun_clone_destroy);
166 
167 struct if_clone tap_cloner =
168     IF_CLONE_INITIALIZER("tap", tap_clone_create, tun_clone_destroy);
169 
170 void
171 tunattach(int n)
172 {
173 	LIST_INIT(&tun_softc_list);
174 	LIST_INIT(&tap_softc_list);
175 	if_clone_attach(&tun_cloner);
176 	if_clone_attach(&tap_cloner);
177 #ifdef PIPEX
178 	pipex_init();
179 #endif
180 }
181 
182 int
183 tun_clone_create(struct if_clone *ifc, int unit)
184 {
185 	return (tun_create(ifc, unit, 0));
186 }
187 
188 int
189 tap_clone_create(struct if_clone *ifc, int unit)
190 {
191 	return (tun_create(ifc, unit, TUN_LAYER2));
192 }
193 
194 int
195 tun_create(struct if_clone *ifc, int unit, int flags)
196 {
197 	struct tun_softc	*tp;
198 	struct ifnet		*ifp;
199 
200 	if (unit > minor(~0U))
201 		return (ENXIO);
202 
203 	tp = malloc(sizeof(*tp), M_DEVBUF, M_WAITOK|M_ZERO);
204 	tp->tun_unit = unit;
205 	tp->tun_flags = TUN_INITED|TUN_STAYUP;
206 
207 	ifp = &tp->tun_if;
208 	snprintf(ifp->if_xname, sizeof(ifp->if_xname), "%s%d", ifc->ifc_name,
209 	    unit);
210 	ifp->if_softc = tp;
211 
212 	ifp->if_ioctl = tun_ioctl;
213 	ifp->if_output = tun_output;
214 	ifp->if_start = tun_start;
215 	ifp->if_hardmtu = TUNMRU;
216 	ifp->if_link_state = LINK_STATE_DOWN;
217 	IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN);
218 
219 	if ((flags & TUN_LAYER2) == 0) {
220 		tp->tun_flags &= ~TUN_LAYER2;
221 		ifp->if_mtu = ETHERMTU;
222 		ifp->if_flags = (IFF_POINTOPOINT|IFF_MULTICAST);
223 		ifp->if_type = IFT_TUNNEL;
224 		ifp->if_hdrlen = sizeof(u_int32_t);
225 		ifp->if_rtrequest = p2p_rtrequest;
226 
227 		if_attach(ifp);
228 		if_alloc_sadl(ifp);
229 #if NBPFILTER > 0
230 		bpfattach(&ifp->if_bpf, ifp, DLT_LOOP, sizeof(u_int32_t));
231 #endif
232 		LIST_INSERT_HEAD(&tun_softc_list, tp, entry);
233 	} else {
234 		tp->tun_flags |= TUN_LAYER2;
235 		ether_fakeaddr(ifp);
236 		ifp->if_flags =
237 		    (IFF_BROADCAST|IFF_SIMPLEX|IFF_MULTICAST);
238 		ifp->if_capabilities = IFCAP_VLAN_MTU;
239 
240 		if_attach(ifp);
241 		ether_ifattach(ifp);
242 
243 		LIST_INSERT_HEAD(&tap_softc_list, tp, entry);
244 	}
245 
246 #ifdef PIPEX
247 	if ((tp->tun_flags & TUN_LAYER2) == 0)
248 		pipex_iface_init(&tp->pipex_iface, ifp);
249 #endif
250 
251 	return (0);
252 }
253 
254 int
255 tun_clone_destroy(struct ifnet *ifp)
256 {
257 	struct tun_softc	*tp = ifp->if_softc;
258 	int			 s;
259 
260 #ifdef PIPEX
261 	if ((tp->tun_flags & TUN_LAYER2) == 0)
262 		pipex_iface_fini(&tp->pipex_iface);
263 #endif
264 	tun_wakeup(tp);
265 
266 	s = splhigh();
267 	klist_invalidate(&tp->tun_rsel.si_note);
268 	klist_invalidate(&tp->tun_wsel.si_note);
269 	splx(s);
270 
271 	LIST_REMOVE(tp, entry);
272 
273 	if (tp->tun_flags & TUN_LAYER2)
274 		ether_ifdetach(ifp);
275 
276 	if_detach(ifp);
277 
278 	free(tp, M_DEVBUF, sizeof *tp);
279 	return (0);
280 }
281 
282 static inline struct tun_softc *
283 tun_lookup(int unit)
284 {
285 	struct tun_softc *tp;
286 
287 	LIST_FOREACH(tp, &tun_softc_list, entry)
288 		if (tp->tun_unit == unit)
289 			return (tp);
290 	return (NULL);
291 }
292 
293 static inline struct tun_softc *
294 tap_lookup(int unit)
295 {
296 	struct tun_softc *tp;
297 
298 	LIST_FOREACH(tp, &tap_softc_list, entry)
299 		if (tp->tun_unit == unit)
300 			return (tp);
301 	return (NULL);
302 }
303 
304 /*
305  * tunnel open - must be superuser & the device must be
306  * configured in
307  */
308 int
309 tunopen(dev_t dev, int flag, int mode, struct proc *p)
310 {
311 	struct tun_softc *tp;
312 	unsigned int rdomain = rtable_l2(p->p_p->ps_rtableid);
313 
314 	if ((tp = tun_lookup(minor(dev))) == NULL) {	/* create on demand */
315 		char	xname[IFNAMSIZ];
316 		int	error;
317 
318 		snprintf(xname, sizeof(xname), "%s%d", "tun", minor(dev));
319 		NET_LOCK();
320 		error = if_clone_create(xname, rdomain);
321 		NET_UNLOCK();
322 		if (error != 0)
323 			return (error);
324 
325 		if ((tp = tun_lookup(minor(dev))) == NULL)
326 			return (ENXIO);
327 		tp->tun_flags &= ~TUN_STAYUP;
328 	}
329 
330 	return (tun_dev_open(tp, flag, mode, p));
331 }
332 
333 int
334 tapopen(dev_t dev, int flag, int mode, struct proc *p)
335 {
336 	struct tun_softc *tp;
337 	unsigned int rdomain = rtable_l2(p->p_p->ps_rtableid);
338 
339 	if ((tp = tap_lookup(minor(dev))) == NULL) {	/* create on demand */
340 		char	xname[IFNAMSIZ];
341 		int	error;
342 
343 		snprintf(xname, sizeof(xname), "%s%d", "tap", minor(dev));
344 		NET_LOCK();
345 		error = if_clone_create(xname, rdomain);
346 		NET_UNLOCK();
347 		if (error != 0)
348 			return (error);
349 
350 		if ((tp = tap_lookup(minor(dev))) == NULL)
351 			return (ENXIO);
352 		tp->tun_flags &= ~TUN_STAYUP;
353 	}
354 
355 	return (tun_dev_open(tp, flag, mode, p));
356 }
357 
358 int
359 tun_dev_open(struct tun_softc *tp, int flag, int mode, struct proc *p)
360 {
361 	struct ifnet *ifp;
362 
363 	if (tp->tun_flags & TUN_OPEN)
364 		return (EBUSY);
365 
366 	ifp = &tp->tun_if;
367 	tp->tun_flags |= TUN_OPEN;
368 	if (flag & FNONBLOCK)
369 		tp->tun_flags |= TUN_NBIO;
370 
371 	/* automatically mark the interface running on open */
372 	ifp->if_flags |= IFF_RUNNING;
373 	tun_link_state(tp);
374 
375 	TUNDEBUG(("%s: open\n", ifp->if_xname));
376 	return (0);
377 }
378 
379 /*
380  * tunclose - close the device; if closing the real device, flush pending
381  *  output and unless STAYUP bring down and destroy the interface.
382  */
383 int
384 tunclose(dev_t dev, int flag, int mode, struct proc *p)
385 {
386 	struct tun_softc	*tp;
387 
388 	if ((tp = tun_lookup(minor(dev))) == NULL)
389 		return (ENXIO);
390 	return (tun_dev_close(tp, flag, mode, p));
391 }
392 
393 int
394 tapclose(dev_t dev, int flag, int mode, struct proc *p)
395 {
396 	struct tun_softc	*tp;
397 
398 	if ((tp = tap_lookup(minor(dev))) == NULL)
399 		return (ENXIO);
400 	return (tun_dev_close(tp, flag, mode, p));
401 }
402 
403 int
404 tun_dev_close(struct tun_softc *tp, int flag, int mode, struct proc *p)
405 {
406 	int			 error = 0;
407 	struct ifnet		*ifp;
408 
409 	ifp = &tp->tun_if;
410 	tp->tun_flags &= ~(TUN_OPEN|TUN_NBIO|TUN_ASYNC);
411 
412 	/*
413 	 * junk all pending output
414 	 */
415 	ifp->if_flags &= ~IFF_RUNNING;
416 	tun_link_state(tp);
417 	IFQ_PURGE(&ifp->if_snd);
418 
419 	TUNDEBUG(("%s: closed\n", ifp->if_xname));
420 
421 	if (!(tp->tun_flags & TUN_STAYUP)) {
422 		NET_LOCK();
423 		error = if_clone_destroy(ifp->if_xname);
424 		NET_UNLOCK();
425 	} else {
426 		tp->tun_pgid = 0;
427 		selwakeup(&tp->tun_rsel);
428 	}
429 
430 	return (error);
431 }
432 
433 int
434 tun_init(struct tun_softc *tp)
435 {
436 	struct ifnet	*ifp = &tp->tun_if;
437 	struct ifaddr	*ifa;
438 
439 	TUNDEBUG(("%s: tun_init\n", ifp->if_xname));
440 
441 	ifp->if_flags |= IFF_UP | IFF_RUNNING;
442 
443 	tp->tun_flags &= ~(TUN_IASET|TUN_DSTADDR|TUN_BRDADDR);
444 	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
445 		if (ifa->ifa_addr->sa_family == AF_INET) {
446 			struct sockaddr_in *sin;
447 
448 			sin = satosin(ifa->ifa_addr);
449 			if (sin && sin->sin_addr.s_addr)
450 				tp->tun_flags |= TUN_IASET;
451 
452 			if (ifp->if_flags & IFF_POINTOPOINT) {
453 				sin = satosin(ifa->ifa_dstaddr);
454 				if (sin && sin->sin_addr.s_addr)
455 					tp->tun_flags |= TUN_DSTADDR;
456 			} else
457 				tp->tun_flags &= ~TUN_DSTADDR;
458 
459 			if (ifp->if_flags & IFF_BROADCAST) {
460 				sin = satosin(ifa->ifa_broadaddr);
461 				if (sin && sin->sin_addr.s_addr)
462 					tp->tun_flags |= TUN_BRDADDR;
463 			} else
464 				tp->tun_flags &= ~TUN_BRDADDR;
465 		}
466 #ifdef INET6
467 		if (ifa->ifa_addr->sa_family == AF_INET6) {
468 			struct sockaddr_in6 *sin6;
469 
470 			sin6 = satosin6(ifa->ifa_addr);
471 			if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
472 				tp->tun_flags |= TUN_IASET;
473 
474 			if (ifp->if_flags & IFF_POINTOPOINT) {
475 				sin6 = satosin6(ifa->ifa_dstaddr);
476 				if (sin6 &&
477 				    !IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
478 					tp->tun_flags |= TUN_DSTADDR;
479 			} else
480 				tp->tun_flags &= ~TUN_DSTADDR;
481 		}
482 #endif /* INET6 */
483 	}
484 
485 	return (0);
486 }
487 
488 /*
489  * Process an ioctl request.
490  */
491 int
492 tun_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
493 {
494 	struct tun_softc	*tp = (struct tun_softc *)(ifp->if_softc);
495 	struct ifreq		*ifr = (struct ifreq *)data;
496 	int			 error = 0;
497 
498 	switch (cmd) {
499 	case SIOCSIFADDR:
500 		tun_init(tp);
501 		break;
502 	case SIOCSIFDSTADDR:
503 		tun_init(tp);
504 		TUNDEBUG(("%s: destination address set\n", ifp->if_xname));
505 		break;
506 	case SIOCSIFMTU:
507 		if (ifr->ifr_mtu < ETHERMIN || ifr->ifr_mtu > TUNMRU)
508 			error = EINVAL;
509 		else
510 			ifp->if_mtu = ifr->ifr_mtu;
511 		break;
512 	case SIOCADDMULTI:
513 	case SIOCDELMULTI:
514 		break;
515 	case SIOCSIFFLAGS:
516 		break;
517 	default:
518 		if (tp->tun_flags & TUN_LAYER2)
519 			error = ether_ioctl(ifp, &tp->arpcom, cmd, data);
520 		else
521 			error = ENOTTY;
522 	}
523 
524 	return (error);
525 }
526 
527 /*
528  * tun_output - queue packets from higher level ready to put out.
529  */
530 int
531 tun_output(struct ifnet *ifp, struct mbuf *m0, struct sockaddr *dst,
532     struct rtentry *rt)
533 {
534 	struct tun_softc	*tp = ifp->if_softc;
535 	int			 error;
536 	u_int32_t		*af;
537 
538 	if ((ifp->if_flags & (IFF_UP|IFF_RUNNING)) != (IFF_UP|IFF_RUNNING)) {
539 		m_freem(m0);
540 		return (EHOSTDOWN);
541 	}
542 
543 	TUNDEBUG(("%s: tun_output\n", ifp->if_xname));
544 
545 	if ((tp->tun_flags & TUN_READY) != TUN_READY) {
546 		TUNDEBUG(("%s: not ready %#x\n", ifp->if_xname,
547 		     tp->tun_flags));
548 		m_freem(m0);
549 		return (EHOSTDOWN);
550 	}
551 
552 	if (tp->tun_flags & TUN_LAYER2)
553 		return (ether_output(ifp, m0, dst, rt));
554 
555 	M_PREPEND(m0, sizeof(*af), M_DONTWAIT);
556 	if (m0 == NULL)
557 		return (ENOBUFS);
558 	af = mtod(m0, u_int32_t *);
559 	*af = htonl(dst->sa_family);
560 
561 #if NBPFILTER > 0
562 	if (ifp->if_bpf)
563 		bpf_mtap(ifp->if_bpf, m0, BPF_DIRECTION_OUT);
564 #endif
565 #ifdef PIPEX
566 	if (pipex_enable && (m0 = pipex_output(m0, dst->sa_family,
567 	    sizeof(u_int32_t), &tp->pipex_iface)) == NULL) {
568 		return (0);
569 	}
570 #endif
571 
572 	error = if_enqueue(ifp, m0);
573 
574 	if (error) {
575 		ifp->if_collisions++;
576 		return (error);
577 	}
578 
579 	tun_wakeup(tp);
580 	return (0);
581 }
582 
583 void
584 tun_wakeup(struct tun_softc *tp)
585 {
586 	KERNEL_LOCK();
587 	if (tp->tun_flags & TUN_RWAIT) {
588 		tp->tun_flags &= ~TUN_RWAIT;
589 		wakeup((caddr_t)tp);
590 	}
591 	if (tp->tun_flags & TUN_ASYNC && tp->tun_pgid)
592 		csignal(tp->tun_pgid, SIGIO,
593 		    tp->tun_siguid, tp->tun_sigeuid);
594 	selwakeup(&tp->tun_rsel);
595 	KERNEL_UNLOCK();
596 }
597 
598 /*
599  * the cdevsw interface is now pretty minimal.
600  */
601 int
602 tunioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p)
603 {
604 	struct tun_softc *tp;
605 
606 	if ((tp = tun_lookup(minor(dev))) == NULL)
607 		return (ENXIO);
608 	return (tun_dev_ioctl(tp, cmd, data, flag, p));
609 }
610 
611 int
612 tapioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p)
613 {
614 	struct tun_softc *tp;
615 
616 	if ((tp = tap_lookup(minor(dev))) == NULL)
617 		return (ENXIO);
618 	return (tun_dev_ioctl(tp, cmd, data, flag, p));
619 }
620 
621 int
622 tun_dev_ioctl(struct tun_softc *tp, u_long cmd, caddr_t data, int flag,
623     struct proc *p)
624 {
625 	struct tuninfo		*tunp;
626 
627 	switch (cmd) {
628 	case TUNSIFINFO:
629 		tunp = (struct tuninfo *)data;
630 		if (tunp->mtu < ETHERMIN || tunp->mtu > TUNMRU)
631 			return (EINVAL);
632 		tp->tun_if.if_mtu = tunp->mtu;
633 		tp->tun_if.if_type = tunp->type;
634 		tp->tun_if.if_flags =
635 		    (tunp->flags & TUN_IFF_FLAGS) |
636 		    (tp->tun_if.if_flags & ~TUN_IFF_FLAGS);
637 		tp->tun_if.if_baudrate = tunp->baudrate;
638 		break;
639 	case TUNGIFINFO:
640 		tunp = (struct tuninfo *)data;
641 		tunp->mtu = tp->tun_if.if_mtu;
642 		tunp->type = tp->tun_if.if_type;
643 		tunp->flags = tp->tun_if.if_flags;
644 		tunp->baudrate = tp->tun_if.if_baudrate;
645 		break;
646 #ifdef TUN_DEBUG
647 	case TUNSDEBUG:
648 		tundebug = *(int *)data;
649 		break;
650 	case TUNGDEBUG:
651 		*(int *)data = tundebug;
652 		break;
653 #endif
654 	case TUNSIFMODE:
655 		switch (*(int *)data & (IFF_POINTOPOINT|IFF_BROADCAST)) {
656 		case IFF_POINTOPOINT:
657 		case IFF_BROADCAST:
658 			tp->tun_if.if_flags &= ~TUN_IFF_FLAGS;
659 			tp->tun_if.if_flags |= *(int *)data & TUN_IFF_FLAGS;
660 			break;
661 		default:
662 			return (EINVAL);
663 		}
664 		break;
665 
666 	case FIONBIO:
667 		if (*(int *)data)
668 			tp->tun_flags |= TUN_NBIO;
669 		else
670 			tp->tun_flags &= ~TUN_NBIO;
671 		break;
672 	case FIOASYNC:
673 		if (*(int *)data)
674 			tp->tun_flags |= TUN_ASYNC;
675 		else
676 			tp->tun_flags &= ~TUN_ASYNC;
677 		break;
678 	case FIONREAD:
679 		*(int *)data = ifq_hdatalen(&tp->tun_if.if_snd);
680 		break;
681 	case TIOCSPGRP:
682 		tp->tun_pgid = *(int *)data;
683 		tp->tun_siguid = p->p_ucred->cr_ruid;
684 		tp->tun_sigeuid = p->p_ucred->cr_uid;
685 		break;
686 	case TIOCGPGRP:
687 		*(int *)data = tp->tun_pgid;
688 		break;
689 	case SIOCGIFADDR:
690 		if (!(tp->tun_flags & TUN_LAYER2))
691 			return (EINVAL);
692 		bcopy(tp->arpcom.ac_enaddr, data,
693 		    sizeof(tp->arpcom.ac_enaddr));
694 		break;
695 
696 	case SIOCSIFADDR:
697 		if (!(tp->tun_flags & TUN_LAYER2))
698 			return (EINVAL);
699 		bcopy(data, tp->arpcom.ac_enaddr,
700 		    sizeof(tp->arpcom.ac_enaddr));
701 		break;
702 	default:
703 #ifdef PIPEX
704 		if (!(tp->tun_flags & TUN_LAYER2)) {
705 			int ret;
706 			ret = pipex_ioctl(&tp->pipex_iface, cmd, data);
707 			return (ret);
708 		}
709 #endif
710 		return (ENOTTY);
711 	}
712 	return (0);
713 }
714 
715 /*
716  * The cdevsw read interface - reads a packet at a time, or at
717  * least as much of a packet as can be read.
718  */
719 int
720 tunread(dev_t dev, struct uio *uio, int ioflag)
721 {
722 	struct tun_softc *tp;
723 
724 	if ((tp = tun_lookup(minor(dev))) == NULL)
725 		return (ENXIO);
726 	return (tun_dev_read(tp, uio, ioflag));
727 }
728 
729 int
730 tapread(dev_t dev, struct uio *uio, int ioflag)
731 {
732 	struct tun_softc *tp;
733 
734 	if ((tp = tap_lookup(minor(dev))) == NULL)
735 		return (ENXIO);
736 	return (tun_dev_read(tp, uio, ioflag));
737 }
738 
739 int
740 tun_dev_read(struct tun_softc *tp, struct uio *uio, int ioflag)
741 {
742 	struct ifnet		*ifp = &tp->tun_if;
743 	struct mbuf		*m, *m0;
744 	unsigned int		 ifidx;
745 	int			 error = 0;
746 	size_t			 len;
747 
748 	if ((tp->tun_flags & TUN_READY) != TUN_READY)
749 		return (EHOSTDOWN);
750 
751 	ifidx = ifp->if_index;
752 	tp->tun_flags &= ~TUN_RWAIT;
753 
754 	do {
755 		struct ifnet *ifp1;
756 		int destroyed;
757 
758 		while ((tp->tun_flags & TUN_READY) != TUN_READY) {
759 			if ((error = tsleep((caddr_t)tp,
760 			    (PZERO + 1)|PCATCH, "tunread", 0)) != 0)
761 				return (error);
762 			/* Make sure the interface still exists. */
763 			ifp1 = if_get(ifidx);
764 			destroyed = (ifp1 == NULL);
765 			if_put(ifp1);
766 			if (destroyed)
767 				return (ENXIO);
768 		}
769 		IFQ_DEQUEUE(&ifp->if_snd, m0);
770 		if (m0 == NULL) {
771 			if (tp->tun_flags & TUN_NBIO && ioflag & IO_NDELAY)
772 				return (EWOULDBLOCK);
773 			tp->tun_flags |= TUN_RWAIT;
774 			if ((error = tsleep((caddr_t)tp,
775 			    (PZERO + 1)|PCATCH, "tunread", 0)) != 0)
776 				return (error);
777 			/* Make sure the interface still exists. */
778 			ifp1 = if_get(ifidx);
779 			destroyed = (ifp1 == NULL);
780 			if_put(ifp1);
781 			if (destroyed)
782 				return (ENXIO);
783 		}
784 	} while (m0 == NULL);
785 
786 	if (tp->tun_flags & TUN_LAYER2) {
787 #if NBPFILTER > 0
788 		if (ifp->if_bpf)
789 			bpf_mtap(ifp->if_bpf, m0, BPF_DIRECTION_OUT);
790 #endif
791 	}
792 
793 	while (m0 != NULL && uio->uio_resid > 0 && error == 0) {
794 		len = ulmin(uio->uio_resid, m0->m_len);
795 		if (len != 0)
796 			error = uiomove(mtod(m0, caddr_t), len, uio);
797 		m = m_free(m0);
798 		m0 = m;
799 	}
800 
801 	if (m0 != NULL) {
802 		TUNDEBUG(("Dropping mbuf\n"));
803 		m_freem(m0);
804 	}
805 	if (error)
806 		ifp->if_oerrors++;
807 
808 	return (error);
809 }
810 
811 /*
812  * the cdevsw write interface - an atomic write is a packet - or else!
813  */
814 int
815 tunwrite(dev_t dev, struct uio *uio, int ioflag)
816 {
817 	struct tun_softc *tp;
818 
819 	if ((tp = tun_lookup(minor(dev))) == NULL)
820 		return (ENXIO);
821 	return (tun_dev_write(tp, uio, ioflag));
822 }
823 
824 int
825 tapwrite(dev_t dev, struct uio *uio, int ioflag)
826 {
827 	struct tun_softc *tp;
828 
829 	if ((tp = tap_lookup(minor(dev))) == NULL)
830 		return (ENXIO);
831 	return (tun_dev_write(tp, uio, ioflag));
832 }
833 
834 int
835 tun_dev_write(struct tun_softc *tp, struct uio *uio, int ioflag)
836 {
837 	struct ifnet		*ifp;
838 	u_int32_t		*th;
839 	struct mbuf		*top, **mp, *m;
840 	int			error = 0, tlen;
841 	size_t			mlen;
842 
843 	ifp = &tp->tun_if;
844 	TUNDEBUG(("%s: tunwrite\n", ifp->if_xname));
845 
846 	if (uio->uio_resid == 0 || uio->uio_resid > ifp->if_mtu +
847 	    (tp->tun_flags & TUN_LAYER2 ? ETHER_HDR_LEN : sizeof(*th))) {
848 		TUNDEBUG(("%s: len=%d!\n", ifp->if_xname, uio->uio_resid));
849 		return (EMSGSIZE);
850 	}
851 	tlen = uio->uio_resid;
852 
853 	/* get a header mbuf */
854 	MGETHDR(m, M_DONTWAIT, MT_DATA);
855 	if (m == NULL)
856 		return (ENOBUFS);
857 	mlen = MHLEN;
858 	if (uio->uio_resid >= MINCLSIZE) {
859 		MCLGET(m, M_DONTWAIT);
860 		if (!(m->m_flags & M_EXT)) {
861 			m_free(m);
862 			return (ENOBUFS);
863 		}
864 		mlen = MCLBYTES;
865 	}
866 
867 	top = NULL;
868 	mp = &top;
869 	if (tp->tun_flags & TUN_LAYER2) {
870 		/*
871 		 * Pad so that IP header is correctly aligned
872 		 * this is necessary for all strict aligned architectures.
873 		 */
874 		mlen -= ETHER_ALIGN;
875 		m->m_data += ETHER_ALIGN;
876 	}
877 	while (error == 0 && uio->uio_resid > 0) {
878 		m->m_len = ulmin(mlen, uio->uio_resid);
879 		error = uiomove(mtod (m, caddr_t), m->m_len, uio);
880 		*mp = m;
881 		mp = &m->m_next;
882 		if (error == 0 && uio->uio_resid > 0) {
883 			MGET(m, M_DONTWAIT, MT_DATA);
884 			if (m == NULL) {
885 				error = ENOBUFS;
886 				break;
887 			}
888 			mlen = MLEN;
889 			if (uio->uio_resid >= MINCLSIZE) {
890 				MCLGET(m, M_DONTWAIT);
891 				if (!(m->m_flags & M_EXT)) {
892 					error = ENOBUFS;
893 					m_free(m);
894 					break;
895 				}
896 				mlen = MCLBYTES;
897 			}
898 		}
899 	}
900 	if (error) {
901 		m_freem(top);
902 		ifp->if_ierrors++;
903 		return (error);
904 	}
905 
906 	top->m_pkthdr.len = tlen;
907 
908 	if (tp->tun_flags & TUN_LAYER2) {
909 		struct mbuf_list ml = MBUF_LIST_INITIALIZER();
910 
911 		ml_enqueue(&ml, top);
912 		if_input(ifp, &ml);
913 		return (0);
914 	}
915 
916 #if NBPFILTER > 0
917 	if (ifp->if_bpf) {
918 		bpf_mtap(ifp->if_bpf, top, BPF_DIRECTION_IN);
919 	}
920 #endif
921 
922 	th = mtod(top, u_int32_t *);
923 	/* strip the tunnel header */
924 	top->m_data += sizeof(*th);
925 	top->m_len  -= sizeof(*th);
926 	top->m_pkthdr.len -= sizeof(*th);
927 	top->m_pkthdr.ph_rtableid = ifp->if_rdomain;
928 	top->m_pkthdr.ph_ifidx = ifp->if_index;
929 
930 	ifp->if_ipackets++;
931 	ifp->if_ibytes += top->m_pkthdr.len;
932 
933 	NET_LOCK();
934 
935 	switch (ntohl(*th)) {
936 	case AF_INET:
937 		ipv4_input(ifp, top);
938 		break;
939 #ifdef INET6
940 	case AF_INET6:
941 		ipv6_input(ifp, top);
942 		break;
943 #endif
944 #ifdef MPLS
945 	case AF_MPLS:
946 		mpls_input(ifp, top);
947 		break;
948 #endif
949 	default:
950 		m_freem(top);
951 		error = EAFNOSUPPORT;
952 		break;
953 	}
954 
955 	NET_UNLOCK();
956 
957 	return (error);
958 }
959 
960 /*
961  * tunpoll - the poll interface, this is only useful on reads
962  * really. The write detect always returns true, write never blocks
963  * anyway, it either accepts the packet or drops it.
964  */
965 int
966 tunpoll(dev_t dev, int events, struct proc *p)
967 {
968 	struct tun_softc *tp;
969 
970 	if ((tp = tun_lookup(minor(dev))) == NULL)
971 		return (POLLERR);
972 	return (tun_dev_poll(tp, events, p));
973 }
974 
975 int
976 tappoll(dev_t dev, int events, struct proc *p)
977 {
978 	struct tun_softc *tp;
979 
980 	if ((tp = tap_lookup(minor(dev))) == NULL)
981 		return (POLLERR);
982 	return (tun_dev_poll(tp, events, p));
983 }
984 
985 int
986 tun_dev_poll(struct tun_softc *tp, int events, struct proc *p)
987 {
988 	int			 revents;
989 	struct ifnet		*ifp;
990 	unsigned int		 len;
991 
992 	ifp = &tp->tun_if;
993 	revents = 0;
994 	TUNDEBUG(("%s: tunpoll\n", ifp->if_xname));
995 
996 	if (events & (POLLIN | POLLRDNORM)) {
997 		len = IFQ_LEN(&ifp->if_snd);
998 		if (len > 0) {
999 			TUNDEBUG(("%s: tunselect q=%d\n", ifp->if_xname, len));
1000 			revents |= events & (POLLIN | POLLRDNORM);
1001 		} else {
1002 			TUNDEBUG(("%s: tunpoll waiting\n", ifp->if_xname));
1003 			selrecord(p, &tp->tun_rsel);
1004 		}
1005 	}
1006 	if (events & (POLLOUT | POLLWRNORM))
1007 		revents |= events & (POLLOUT | POLLWRNORM);
1008 	return (revents);
1009 }
1010 
1011 /*
1012  * kqueue(2) support.
1013  *
1014  * The tun driver uses an array of tun_softc's based on the minor number
1015  * of the device.  kn->kn_hook gets set to the specific tun_softc.
1016  *
1017  * filt_tunread() sets kn->kn_data to the iface qsize
1018  * filt_tunwrite() sets kn->kn_data to the MTU size
1019  */
1020 int
1021 tunkqfilter(dev_t dev, struct knote *kn)
1022 {
1023 	struct tun_softc *tp;
1024 
1025 	if ((tp = tun_lookup(minor(dev))) == NULL)
1026 		return (ENXIO);
1027 	return (tun_dev_kqfilter(tp, kn));
1028 }
1029 
1030 int
1031 tapkqfilter(dev_t dev, struct knote *kn)
1032 {
1033 	struct tun_softc *tp;
1034 
1035 	if ((tp = tap_lookup(minor(dev))) == NULL)
1036 		return (ENXIO);
1037 	return (tun_dev_kqfilter(tp, kn));
1038 }
1039 
1040 int
1041 tun_dev_kqfilter(struct tun_softc *tp, struct knote *kn)
1042 {
1043 	int			 s;
1044 	struct klist		*klist;
1045 	struct ifnet		*ifp;
1046 
1047 	ifp = &tp->tun_if;
1048 	TUNDEBUG(("%s: tunkqfilter\n", ifp->if_xname));
1049 
1050 	switch (kn->kn_filter) {
1051 		case EVFILT_READ:
1052 			klist = &tp->tun_rsel.si_note;
1053 			kn->kn_fop = &tunread_filtops;
1054 			break;
1055 		case EVFILT_WRITE:
1056 			klist = &tp->tun_wsel.si_note;
1057 			kn->kn_fop = &tunwrite_filtops;
1058 			break;
1059 		default:
1060 			return (EINVAL);
1061 	}
1062 
1063 	kn->kn_hook = (caddr_t)tp;
1064 
1065 	s = splhigh();
1066 	SLIST_INSERT_HEAD(klist, kn, kn_selnext);
1067 	splx(s);
1068 
1069 	return (0);
1070 }
1071 
1072 void
1073 filt_tunrdetach(struct knote *kn)
1074 {
1075 	int			 s;
1076 	struct tun_softc	*tp;
1077 
1078 	tp = (struct tun_softc *)kn->kn_hook;
1079 	s = splhigh();
1080 	if (!(kn->kn_status & KN_DETACHED))
1081 		SLIST_REMOVE(&tp->tun_rsel.si_note, kn, knote, kn_selnext);
1082 	splx(s);
1083 }
1084 
1085 int
1086 filt_tunread(struct knote *kn, long hint)
1087 {
1088 	struct tun_softc	*tp;
1089 	struct ifnet		*ifp;
1090 	unsigned int		 len;
1091 
1092 	if (kn->kn_status & KN_DETACHED) {
1093 		kn->kn_data = 0;
1094 		return (1);
1095 	}
1096 
1097 	tp = (struct tun_softc *)kn->kn_hook;
1098 	ifp = &tp->tun_if;
1099 
1100 	len = IFQ_LEN(&ifp->if_snd);
1101 	if (len > 0) {
1102 		kn->kn_data = len;
1103 
1104 		TUNDEBUG(("%s: tunkqread q=%d\n", ifp->if_xname,
1105 		    IFQ_LEN(&ifp->if_snd)));
1106 		return (1);
1107 	}
1108 	TUNDEBUG(("%s: tunkqread waiting\n", ifp->if_xname));
1109 	return (0);
1110 }
1111 
1112 void
1113 filt_tunwdetach(struct knote *kn)
1114 {
1115 	int			 s;
1116 	struct tun_softc	*tp;
1117 
1118 	tp = (struct tun_softc *)kn->kn_hook;
1119 	s = splhigh();
1120 	if (!(kn->kn_status & KN_DETACHED))
1121 		SLIST_REMOVE(&tp->tun_wsel.si_note, kn, knote, kn_selnext);
1122 	splx(s);
1123 }
1124 
1125 int
1126 filt_tunwrite(struct knote *kn, long hint)
1127 {
1128 	struct tun_softc	*tp;
1129 	struct ifnet		*ifp;
1130 
1131 	if (kn->kn_status & KN_DETACHED) {
1132 		kn->kn_data = 0;
1133 		return (1);
1134 	}
1135 
1136 	tp = (struct tun_softc *)kn->kn_hook;
1137 	ifp = &tp->tun_if;
1138 
1139 	kn->kn_data = ifp->if_mtu;
1140 
1141 	return (1);
1142 }
1143 
1144 void
1145 tun_start(struct ifnet *ifp)
1146 {
1147 	struct tun_softc	*tp = ifp->if_softc;
1148 
1149 	splassert(IPL_NET);
1150 
1151 	if (IFQ_LEN(&ifp->if_snd))
1152 		tun_wakeup(tp);
1153 }
1154 
1155 void
1156 tun_link_state(struct tun_softc *tp)
1157 {
1158 	struct ifnet *ifp = &tp->tun_if;
1159 	int link_state = LINK_STATE_DOWN;
1160 
1161 	if (tp->tun_flags & TUN_OPEN) {
1162 		if (tp->tun_flags & TUN_LAYER2)
1163 			link_state = LINK_STATE_FULL_DUPLEX;
1164 		else
1165 			link_state = LINK_STATE_UP;
1166 	}
1167 	if (ifp->if_link_state != link_state) {
1168 		ifp->if_link_state = link_state;
1169 		if_link_state_change(ifp);
1170 	}
1171 }
1172