xref: /openbsd-src/sys/net/if_tun.c (revision f2da64fbbbf1b03f09f390ab01267c93dfd77c4c)
1 /*	$OpenBSD: if_tun.c,v 1.169 2016/09/04 15:46:39 reyk Exp $	*/
2 /*	$NetBSD: if_tun.c,v 1.24 1996/05/07 02:40:48 thorpej Exp $	*/
3 
4 /*
5  * Copyright (c) 1988, Julian Onions <Julian.Onions@nexor.co.uk>
6  * Nottingham University 1987.
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29 
30 /*
31  * This driver takes packets off the IP i/f and hands them up to a
32  * user process to have its wicked way with. This driver has its
33  * roots in a similar driver written by Phil Cockcroft (formerly) at
34  * UCL. This driver is based much more on read/write/select mode of
35  * operation though.
36  */
37 
38 /* #define	TUN_DEBUG	9 */
39 
40 #include <sys/param.h>
41 #include <sys/kernel.h>
42 #include <sys/proc.h>
43 #include <sys/systm.h>
44 #include <sys/mbuf.h>
45 #include <sys/protosw.h>
46 #include <sys/socket.h>
47 #include <sys/ioctl.h>
48 #include <sys/errno.h>
49 #include <sys/syslog.h>
50 #include <sys/selinfo.h>
51 #include <sys/file.h>
52 #include <sys/time.h>
53 #include <sys/device.h>
54 #include <sys/vnode.h>
55 #include <sys/signalvar.h>
56 #include <sys/poll.h>
57 #include <sys/conf.h>
58 
59 
60 #include <net/if.h>
61 #include <net/if_types.h>
62 #include <net/netisr.h>
63 #include <net/rtable.h>
64 
65 #include <netinet/in.h>
66 #include <netinet/if_ether.h>
67 
68 #ifdef PIPEX
69 #include <net/pipex.h>
70 #endif
71 
72 #include "bpfilter.h"
73 #if NBPFILTER > 0
74 #include <net/bpf.h>
75 #endif
76 
77 #include <net/if_tun.h>
78 
79 struct tun_softc {
80 	struct arpcom	arpcom;		/* ethernet common data */
81 	struct selinfo	tun_rsel;	/* read select */
82 	struct selinfo	tun_wsel;	/* write select (not used) */
83 	LIST_ENTRY(tun_softc) entry;	/* all tunnel interfaces */
84 	int		tun_unit;
85 	uid_t		tun_siguid;	/* uid for process that set tun_pgid */
86 	uid_t		tun_sigeuid;	/* euid for process that set tun_pgid */
87 	pid_t		tun_pgid;	/* the process group - if any */
88 	u_short		tun_flags;	/* misc flags */
89 #define tun_if	arpcom.ac_if
90 #ifdef PIPEX
91 	struct pipex_iface_context pipex_iface; /* pipex context */
92 #endif
93 };
94 
95 #ifdef	TUN_DEBUG
96 int	tundebug = TUN_DEBUG;
97 #define TUNDEBUG(a)	(tundebug? printf a : 0)
98 #else
99 #define TUNDEBUG(a)	/* (tundebug? printf a : 0) */
100 #endif
101 
102 /* Only these IFF flags are changeable by TUNSIFINFO */
103 #define TUN_IFF_FLAGS (IFF_UP|IFF_POINTOPOINT|IFF_MULTICAST|IFF_BROADCAST)
104 
105 void	tunattach(int);
106 
107 /* cdev functions */
108 int	tunopen(dev_t, int, int, struct proc *);
109 int	tunclose(dev_t, int, int, struct proc *);
110 int	tunioctl(dev_t, u_long, caddr_t, int, struct proc *);
111 int	tunread(dev_t, struct uio *, int);
112 int	tunwrite(dev_t, struct uio *, int);
113 int	tunpoll(dev_t, int, struct proc *);
114 int	tunkqfilter(dev_t, struct knote *);
115 
116 int	tapopen(dev_t, int, int, struct proc *);
117 int	tapclose(dev_t, int, int, struct proc *);
118 int	tapioctl(dev_t, u_long, caddr_t, int, struct proc *);
119 int	tapread(dev_t, struct uio *, int);
120 int	tapwrite(dev_t, struct uio *, int);
121 int	tappoll(dev_t, int, struct proc *);
122 int	tapkqfilter(dev_t, struct knote *);
123 
124 int	tun_dev_open(struct tun_softc *, int, int, struct proc *);
125 int	tun_dev_close(struct tun_softc *, int, int, struct proc *);
126 int	tun_dev_ioctl(struct tun_softc *, u_long, caddr_t, int, struct proc *);
127 int	tun_dev_read(struct tun_softc *, struct uio *, int);
128 int	tun_dev_write(struct tun_softc *, struct uio *, int);
129 int	tun_dev_poll(struct tun_softc *, int, struct proc *);
130 int	tun_dev_kqfilter(struct tun_softc *, struct knote *);
131 
132 
133 int	tun_ioctl(struct ifnet *, u_long, caddr_t);
134 int	tun_output(struct ifnet *, struct mbuf *, struct sockaddr *,
135 	    struct rtentry *);
136 int	tun_clone_create(struct if_clone *, int);
137 int	tap_clone_create(struct if_clone *, int);
138 int	tun_create(struct if_clone *, int, int);
139 int	tun_clone_destroy(struct ifnet *);
140 static inline struct	tun_softc *tun_lookup(int);
141 static inline struct	tun_softc *tap_lookup(int);
142 void	tun_wakeup(struct tun_softc *);
143 int	tun_init(struct tun_softc *);
144 void	tun_start(struct ifnet *);
145 int	filt_tunread(struct knote *, long);
146 int	filt_tunwrite(struct knote *, long);
147 void	filt_tunrdetach(struct knote *);
148 void	filt_tunwdetach(struct knote *);
149 void	tun_link_state(struct tun_softc *);
150 
151 struct filterops tunread_filtops =
152 	{ 1, NULL, filt_tunrdetach, filt_tunread};
153 
154 struct filterops tunwrite_filtops =
155 	{ 1, NULL, filt_tunwdetach, filt_tunwrite};
156 
157 LIST_HEAD(, tun_softc) tun_softc_list;
158 LIST_HEAD(, tun_softc) tap_softc_list;
159 
160 struct if_clone tun_cloner =
161     IF_CLONE_INITIALIZER("tun", tun_clone_create, tun_clone_destroy);
162 
163 struct if_clone tap_cloner =
164     IF_CLONE_INITIALIZER("tap", tap_clone_create, tun_clone_destroy);
165 
166 void
167 tunattach(int n)
168 {
169 	LIST_INIT(&tun_softc_list);
170 	LIST_INIT(&tap_softc_list);
171 	if_clone_attach(&tun_cloner);
172 	if_clone_attach(&tap_cloner);
173 #ifdef PIPEX
174 	pipex_init();
175 #endif
176 }
177 
178 int
179 tun_clone_create(struct if_clone *ifc, int unit)
180 {
181 	return (tun_create(ifc, unit, 0));
182 }
183 
184 int
185 tap_clone_create(struct if_clone *ifc, int unit)
186 {
187 	return (tun_create(ifc, unit, TUN_LAYER2));
188 }
189 
190 int
191 tun_create(struct if_clone *ifc, int unit, int flags)
192 {
193 	struct tun_softc	*tp;
194 	struct ifnet		*ifp;
195 	int			 s;
196 
197 	tp = malloc(sizeof(*tp), M_DEVBUF, M_NOWAIT|M_ZERO);
198 	if (tp == NULL)
199 		return (ENOMEM);
200 
201 	tp->tun_unit = unit;
202 	tp->tun_flags = TUN_INITED|TUN_STAYUP;
203 
204 	ifp = &tp->tun_if;
205 	snprintf(ifp->if_xname, sizeof(ifp->if_xname), "%s%d", ifc->ifc_name,
206 	    unit);
207 	ifp->if_softc = tp;
208 
209 	ifp->if_ioctl = tun_ioctl;
210 	ifp->if_output = tun_output;
211 	ifp->if_start = tun_start;
212 	ifp->if_hardmtu = TUNMRU;
213 	ifp->if_link_state = LINK_STATE_DOWN;
214 	IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN);
215 
216 	if ((flags & TUN_LAYER2) == 0) {
217 		tp->tun_flags &= ~TUN_LAYER2;
218 		ifp->if_mtu = ETHERMTU;
219 		ifp->if_flags = (IFF_POINTOPOINT|IFF_MULTICAST);
220 		ifp->if_type = IFT_TUNNEL;
221 		ifp->if_hdrlen = sizeof(u_int32_t);
222 		ifp->if_rtrequest = p2p_rtrequest;
223 
224 		if_attach(ifp);
225 		if_alloc_sadl(ifp);
226 #if NBPFILTER > 0
227 		bpfattach(&ifp->if_bpf, ifp, DLT_LOOP, sizeof(u_int32_t));
228 #endif
229 		s = splnet();
230 		LIST_INSERT_HEAD(&tun_softc_list, tp, entry);
231 		splx(s);
232 	} else {
233 		tp->tun_flags |= TUN_LAYER2;
234 		ether_fakeaddr(ifp);
235 		ifp->if_flags =
236 		    (IFF_BROADCAST|IFF_SIMPLEX|IFF_MULTICAST);
237 		ifp->if_capabilities = IFCAP_VLAN_MTU;
238 
239 		if_attach(ifp);
240 		ether_ifattach(ifp);
241 
242 		s = splnet();
243 		LIST_INSERT_HEAD(&tap_softc_list, tp, entry);
244 		splx(s);
245 	}
246 
247 #ifdef PIPEX
248 	if ((tp->tun_flags & TUN_LAYER2) == 0)
249 		pipex_iface_init(&tp->pipex_iface, ifp);
250 #endif
251 
252 	return (0);
253 }
254 
255 int
256 tun_clone_destroy(struct ifnet *ifp)
257 {
258 	struct tun_softc	*tp = ifp->if_softc;
259 	int			 s;
260 
261 #ifdef PIPEX
262 	if ((tp->tun_flags & TUN_LAYER2) == 0)
263 		pipex_iface_fini(&tp->pipex_iface);
264 #endif
265 	tun_wakeup(tp);
266 
267 	s = splhigh();
268 	klist_invalidate(&tp->tun_rsel.si_note);
269 	klist_invalidate(&tp->tun_wsel.si_note);
270 	splx(s);
271 
272 	s = splnet();
273 	LIST_REMOVE(tp, entry);
274 	splx(s);
275 
276 	if (tp->tun_flags & TUN_LAYER2)
277 		ether_ifdetach(ifp);
278 
279 	if_detach(ifp);
280 
281 	free(tp, M_DEVBUF, 0);
282 	return (0);
283 }
284 
285 static inline struct tun_softc *
286 tun_lookup(int unit)
287 {
288 	struct tun_softc *tp;
289 
290 	LIST_FOREACH(tp, &tun_softc_list, entry)
291 		if (tp->tun_unit == unit)
292 			return (tp);
293 	return (NULL);
294 }
295 
296 static inline struct tun_softc *
297 tap_lookup(int unit)
298 {
299 	struct tun_softc *tp;
300 
301 	LIST_FOREACH(tp, &tap_softc_list, entry)
302 		if (tp->tun_unit == unit)
303 			return (tp);
304 	return (NULL);
305 }
306 
307 /*
308  * tunnel open - must be superuser & the device must be
309  * configured in
310  */
311 int
312 tunopen(dev_t dev, int flag, int mode, struct proc *p)
313 {
314 	struct tun_softc *tp;
315 	int error;
316 
317 	if ((tp = tun_lookup(minor(dev))) == NULL) {	/* create on demand */
318 		char	xname[IFNAMSIZ];
319 
320 		snprintf(xname, sizeof(xname), "%s%d", "tun", minor(dev));
321 		if ((error = if_clone_create(xname,
322 		    rtable_l2(p->p_p->ps_rtableid))) != 0)
323 			return (error);
324 
325 		if ((tp = tun_lookup(minor(dev))) == NULL)
326 			return (ENXIO);
327 		tp->tun_flags &= ~TUN_STAYUP;
328 	}
329 
330 	return (tun_dev_open(tp, flag, mode, p));
331 }
332 
333 int
334 tapopen(dev_t dev, int flag, int mode, struct proc *p)
335 {
336 	struct tun_softc *tp;
337 	int error;
338 
339 	if ((tp = tap_lookup(minor(dev))) == NULL) {	/* create on demand */
340 		char	xname[IFNAMSIZ];
341 
342 		snprintf(xname, sizeof(xname), "%s%d", "tap", minor(dev));
343 		if ((error = if_clone_create(xname,
344 		    rtable_l2(p->p_p->ps_rtableid))) != 0)
345 			return (error);
346 
347 		if ((tp = tap_lookup(minor(dev))) == NULL)
348 			return (ENXIO);
349 		tp->tun_flags &= ~TUN_STAYUP;
350 	}
351 
352 	return (tun_dev_open(tp, flag, mode, p));
353 }
354 
355 int
356 tun_dev_open(struct tun_softc *tp, int flag, int mode, struct proc *p)
357 {
358 	struct ifnet *ifp;
359 	int s;
360 
361 	if (tp->tun_flags & TUN_OPEN)
362 		return (EBUSY);
363 
364 	ifp = &tp->tun_if;
365 	tp->tun_flags |= TUN_OPEN;
366 	if (flag & FNONBLOCK)
367 		tp->tun_flags |= TUN_NBIO;
368 
369 	/* automatically mark the interface running on open */
370 	s = splnet();
371 	ifp->if_flags |= IFF_RUNNING;
372 	tun_link_state(tp);
373 	splx(s);
374 
375 	TUNDEBUG(("%s: open\n", ifp->if_xname));
376 	return (0);
377 }
378 
379 /*
380  * tunclose - close the device; if closing the real device, flush pending
381  *  output and unless STAYUP bring down and destroy the interface.
382  */
383 int
384 tunclose(dev_t dev, int flag, int mode, struct proc *p)
385 {
386 	struct tun_softc	*tp;
387 
388 	if ((tp = tun_lookup(minor(dev))) == NULL)
389 		return (ENXIO);
390 	return (tun_dev_close(tp, flag, mode, p));
391 }
392 
393 int
394 tapclose(dev_t dev, int flag, int mode, struct proc *p)
395 {
396 	struct tun_softc	*tp;
397 
398 	if ((tp = tap_lookup(minor(dev))) == NULL)
399 		return (ENXIO);
400 	return (tun_dev_close(tp, flag, mode, p));
401 }
402 
403 int
404 tun_dev_close(struct tun_softc *tp, int flag, int mode, struct proc *p)
405 {
406 	int			 s;
407 	struct ifnet		*ifp;
408 
409 	ifp = &tp->tun_if;
410 	tp->tun_flags &= ~(TUN_OPEN|TUN_NBIO|TUN_ASYNC);
411 
412 	/*
413 	 * junk all pending output
414 	 */
415 	s = splnet();
416 	ifp->if_flags &= ~IFF_RUNNING;
417 	tun_link_state(tp);
418 	IFQ_PURGE(&ifp->if_snd);
419 	splx(s);
420 
421 	TUNDEBUG(("%s: closed\n", ifp->if_xname));
422 
423 	if (!(tp->tun_flags & TUN_STAYUP))
424 		return (if_clone_destroy(ifp->if_xname));
425 	else {
426 		tp->tun_pgid = 0;
427 		selwakeup(&tp->tun_rsel);
428 	}
429 
430 	return (0);
431 }
432 
433 int
434 tun_init(struct tun_softc *tp)
435 {
436 	struct ifnet	*ifp = &tp->tun_if;
437 	struct ifaddr	*ifa;
438 
439 	TUNDEBUG(("%s: tun_init\n", ifp->if_xname));
440 
441 	ifp->if_flags |= IFF_UP | IFF_RUNNING;
442 
443 	tp->tun_flags &= ~(TUN_IASET|TUN_DSTADDR|TUN_BRDADDR);
444 	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
445 		if (ifa->ifa_addr->sa_family == AF_INET) {
446 			struct sockaddr_in *sin;
447 
448 			sin = satosin(ifa->ifa_addr);
449 			if (sin && sin->sin_addr.s_addr)
450 				tp->tun_flags |= TUN_IASET;
451 
452 			if (ifp->if_flags & IFF_POINTOPOINT) {
453 				sin = satosin(ifa->ifa_dstaddr);
454 				if (sin && sin->sin_addr.s_addr)
455 					tp->tun_flags |= TUN_DSTADDR;
456 			} else
457 				tp->tun_flags &= ~TUN_DSTADDR;
458 
459 			if (ifp->if_flags & IFF_BROADCAST) {
460 				sin = satosin(ifa->ifa_broadaddr);
461 				if (sin && sin->sin_addr.s_addr)
462 					tp->tun_flags |= TUN_BRDADDR;
463 			} else
464 				tp->tun_flags &= ~TUN_BRDADDR;
465 		}
466 #ifdef INET6
467 		if (ifa->ifa_addr->sa_family == AF_INET6) {
468 			struct sockaddr_in6 *sin6;
469 
470 			sin6 = satosin6(ifa->ifa_addr);
471 			if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
472 				tp->tun_flags |= TUN_IASET;
473 
474 			if (ifp->if_flags & IFF_POINTOPOINT) {
475 				sin6 = satosin6(ifa->ifa_dstaddr);
476 				if (sin6 &&
477 				    !IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
478 					tp->tun_flags |= TUN_DSTADDR;
479 			} else
480 				tp->tun_flags &= ~TUN_DSTADDR;
481 		}
482 #endif /* INET6 */
483 	}
484 
485 	return (0);
486 }
487 
488 /*
489  * Process an ioctl request.
490  */
491 int
492 tun_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
493 {
494 	struct tun_softc	*tp = (struct tun_softc *)(ifp->if_softc);
495 	struct ifreq		*ifr = (struct ifreq *)data;
496 	int			 error = 0, s;
497 
498 	s = splnet();
499 
500 	switch (cmd) {
501 	case SIOCSIFADDR:
502 		tun_init(tp);
503 		break;
504 	case SIOCSIFDSTADDR:
505 		tun_init(tp);
506 		TUNDEBUG(("%s: destination address set\n", ifp->if_xname));
507 		break;
508 	case SIOCSIFMTU:
509 		if (ifr->ifr_mtu < ETHERMIN || ifr->ifr_mtu > TUNMRU)
510 			error = EINVAL;
511 		else
512 			ifp->if_mtu = ifr->ifr_mtu;
513 		break;
514 	case SIOCADDMULTI:
515 	case SIOCDELMULTI:
516 		break;
517 	case SIOCSIFFLAGS:
518 		break;
519 	default:
520 		if (tp->tun_flags & TUN_LAYER2)
521 			error = ether_ioctl(ifp, &tp->arpcom, cmd, data);
522 		else
523 			error = ENOTTY;
524 	}
525 
526 	splx(s);
527 	return (error);
528 }
529 
530 /*
531  * tun_output - queue packets from higher level ready to put out.
532  */
533 int
534 tun_output(struct ifnet *ifp, struct mbuf *m0, struct sockaddr *dst,
535     struct rtentry *rt)
536 {
537 	struct tun_softc	*tp = ifp->if_softc;
538 	int			 s, error;
539 	u_int32_t		*af;
540 
541 	if ((ifp->if_flags & (IFF_UP|IFF_RUNNING)) != (IFF_UP|IFF_RUNNING)) {
542 		m_freem(m0);
543 		return (EHOSTDOWN);
544 	}
545 
546 	TUNDEBUG(("%s: tun_output\n", ifp->if_xname));
547 
548 	if ((tp->tun_flags & TUN_READY) != TUN_READY) {
549 		TUNDEBUG(("%s: not ready %#x\n", ifp->if_xname,
550 		     tp->tun_flags));
551 		m_freem(m0);
552 		return (EHOSTDOWN);
553 	}
554 
555 	if (tp->tun_flags & TUN_LAYER2)
556 		return (ether_output(ifp, m0, dst, rt));
557 
558 	M_PREPEND(m0, sizeof(*af), M_DONTWAIT);
559 	if (m0 == NULL)
560 		return (ENOBUFS);
561 	af = mtod(m0, u_int32_t *);
562 	*af = htonl(dst->sa_family);
563 
564 	s = splnet();
565 
566 #if NBPFILTER > 0
567 	if (ifp->if_bpf)
568 		bpf_mtap(ifp->if_bpf, m0, BPF_DIRECTION_OUT);
569 #endif
570 #ifdef PIPEX
571 	if (pipex_enable && (m0 = pipex_output(m0, dst->sa_family,
572 	    sizeof(u_int32_t), &tp->pipex_iface)) == NULL) {
573 		splx(s);
574 		return (0);
575 	}
576 #endif
577 
578 	error = if_enqueue(ifp, m0);
579 	splx(s);
580 
581 	if (error) {
582 		ifp->if_collisions++;
583 		return (error);
584 	}
585 	ifp->if_opackets++;
586 
587 	tun_wakeup(tp);
588 	return (0);
589 }
590 
591 void
592 tun_wakeup(struct tun_softc *tp)
593 {
594 	if (tp->tun_flags & TUN_RWAIT) {
595 		tp->tun_flags &= ~TUN_RWAIT;
596 		wakeup((caddr_t)tp);
597 	}
598 	if (tp->tun_flags & TUN_ASYNC && tp->tun_pgid)
599 		csignal(tp->tun_pgid, SIGIO,
600 		    tp->tun_siguid, tp->tun_sigeuid);
601 	selwakeup(&tp->tun_rsel);
602 }
603 
604 /*
605  * the cdevsw interface is now pretty minimal.
606  */
607 int
608 tunioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p)
609 {
610 	struct tun_softc *tp;
611 
612 	if ((tp = tun_lookup(minor(dev))) == NULL)
613 		return (ENXIO);
614 	return (tun_dev_ioctl(tp, cmd, data, flag, p));
615 }
616 
617 int
618 tapioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p)
619 {
620 	struct tun_softc *tp;
621 
622 	if ((tp = tap_lookup(minor(dev))) == NULL)
623 		return (ENXIO);
624 	return (tun_dev_ioctl(tp, cmd, data, flag, p));
625 }
626 
627 int
628 tun_dev_ioctl(struct tun_softc *tp, u_long cmd, caddr_t data, int flag,
629     struct proc *p)
630 {
631 	int			 s;
632 	struct tuninfo		*tunp;
633 	struct mbuf		*m;
634 
635 	s = splnet();
636 	switch (cmd) {
637 	case TUNSIFINFO:
638 		tunp = (struct tuninfo *)data;
639 		if (tunp->mtu < ETHERMIN || tunp->mtu > TUNMRU) {
640 			splx(s);
641 			return (EINVAL);
642 		}
643 		tp->tun_if.if_mtu = tunp->mtu;
644 		tp->tun_if.if_type = tunp->type;
645 		tp->tun_if.if_flags =
646 		    (tunp->flags & TUN_IFF_FLAGS) |
647 		    (tp->tun_if.if_flags & ~TUN_IFF_FLAGS);
648 		tp->tun_if.if_baudrate = tunp->baudrate;
649 		break;
650 	case TUNGIFINFO:
651 		tunp = (struct tuninfo *)data;
652 		tunp->mtu = tp->tun_if.if_mtu;
653 		tunp->type = tp->tun_if.if_type;
654 		tunp->flags = tp->tun_if.if_flags;
655 		tunp->baudrate = tp->tun_if.if_baudrate;
656 		break;
657 #ifdef TUN_DEBUG
658 	case TUNSDEBUG:
659 		tundebug = *(int *)data;
660 		break;
661 	case TUNGDEBUG:
662 		*(int *)data = tundebug;
663 		break;
664 #endif
665 	case TUNSIFMODE:
666 		switch (*(int *)data & (IFF_POINTOPOINT|IFF_BROADCAST)) {
667 		case IFF_POINTOPOINT:
668 		case IFF_BROADCAST:
669 			tp->tun_if.if_flags &= ~TUN_IFF_FLAGS;
670 			tp->tun_if.if_flags |= *(int *)data & TUN_IFF_FLAGS;
671 			break;
672 		default:
673 			splx(s);
674 			return (EINVAL);
675 		}
676 		break;
677 
678 	case FIONBIO:
679 		if (*(int *)data)
680 			tp->tun_flags |= TUN_NBIO;
681 		else
682 			tp->tun_flags &= ~TUN_NBIO;
683 		break;
684 	case FIOASYNC:
685 		if (*(int *)data)
686 			tp->tun_flags |= TUN_ASYNC;
687 		else
688 			tp->tun_flags &= ~TUN_ASYNC;
689 		break;
690 	case FIONREAD:
691 		m = ifq_deq_begin(&tp->tun_if.if_snd);
692 		if (m != NULL) {
693 			*(int *)data = m->m_pkthdr.len;
694 			ifq_deq_rollback(&tp->tun_if.if_snd, m);
695 		} else
696 			*(int *)data = 0;
697 		break;
698 	case TIOCSPGRP:
699 		tp->tun_pgid = *(int *)data;
700 		tp->tun_siguid = p->p_ucred->cr_ruid;
701 		tp->tun_sigeuid = p->p_ucred->cr_uid;
702 		break;
703 	case TIOCGPGRP:
704 		*(int *)data = tp->tun_pgid;
705 		break;
706 	case SIOCGIFADDR:
707 		if (!(tp->tun_flags & TUN_LAYER2)) {
708 			splx(s);
709 			return (EINVAL);
710 		}
711 		bcopy(tp->arpcom.ac_enaddr, data,
712 		    sizeof(tp->arpcom.ac_enaddr));
713 		break;
714 
715 	case SIOCSIFADDR:
716 		if (!(tp->tun_flags & TUN_LAYER2)) {
717 			splx(s);
718 			return (EINVAL);
719 		}
720 		bcopy(data, tp->arpcom.ac_enaddr,
721 		    sizeof(tp->arpcom.ac_enaddr));
722 		break;
723 	default:
724 #ifdef PIPEX
725 	    	if (!(tp->tun_flags & TUN_LAYER2)) {
726 			int ret;
727 			ret = pipex_ioctl(&tp->pipex_iface, cmd, data);
728 			splx(s);
729 			return (ret);
730 		}
731 #endif
732 		splx(s);
733 		return (ENOTTY);
734 	}
735 	splx(s);
736 	return (0);
737 }
738 
739 /*
740  * The cdevsw read interface - reads a packet at a time, or at
741  * least as much of a packet as can be read.
742  */
743 int
744 tunread(dev_t dev, struct uio *uio, int ioflag)
745 {
746 	struct tun_softc *tp;
747 
748 	if ((tp = tun_lookup(minor(dev))) == NULL)
749 		return (ENXIO);
750 	return (tun_dev_read(tp, uio, ioflag));
751 }
752 
753 int
754 tapread(dev_t dev, struct uio *uio, int ioflag)
755 {
756 	struct tun_softc *tp;
757 
758 	if ((tp = tap_lookup(minor(dev))) == NULL)
759 		return (ENXIO);
760 	return (tun_dev_read(tp, uio, ioflag));
761 }
762 
763 int
764 tun_dev_read(struct tun_softc *tp, struct uio *uio, int ioflag)
765 {
766 	struct ifnet		*ifp = &tp->tun_if;
767 	struct mbuf		*m, *m0;
768 	unsigned int		 ifidx;
769 	int			 error = 0, s;
770 	size_t			 len;
771 
772 	if ((tp->tun_flags & TUN_READY) != TUN_READY)
773 		return (EHOSTDOWN);
774 
775 	ifidx = ifp->if_index;
776 	tp->tun_flags &= ~TUN_RWAIT;
777 
778 	s = splnet();
779 	do {
780 		struct ifnet *ifp1;
781 		int destroyed;
782 
783 		while ((tp->tun_flags & TUN_READY) != TUN_READY) {
784 			if ((error = tsleep((caddr_t)tp,
785 			    (PZERO + 1)|PCATCH, "tunread", 0)) != 0) {
786 				splx(s);
787 				return (error);
788 			}
789 			/* Make sure the interface still exists. */
790 			ifp1 = if_get(ifidx);
791 			destroyed = (ifp1 == NULL);
792 			if_put(ifp1);
793 			if (destroyed) {
794 				splx(s);
795 				return (ENXIO);
796 			}
797 		}
798 		IFQ_DEQUEUE(&ifp->if_snd, m0);
799 		if (m0 == NULL) {
800 			if (tp->tun_flags & TUN_NBIO && ioflag & IO_NDELAY) {
801 				splx(s);
802 				return (EWOULDBLOCK);
803 			}
804 			tp->tun_flags |= TUN_RWAIT;
805 			if ((error = tsleep((caddr_t)tp,
806 			    (PZERO + 1)|PCATCH, "tunread", 0)) != 0) {
807 				splx(s);
808 				return (error);
809 			}
810 			/* Make sure the interface still exists. */
811 			ifp1 = if_get(ifidx);
812 			destroyed = (ifp1 == NULL);
813 			if_put(ifp1);
814 			if (destroyed) {
815 				splx(s);
816 				return (ENXIO);
817 			}
818 		}
819 	} while (m0 == NULL);
820 	splx(s);
821 
822 	if (tp->tun_flags & TUN_LAYER2) {
823 #if NBPFILTER > 0
824 		if (ifp->if_bpf)
825 			bpf_mtap(ifp->if_bpf, m0, BPF_DIRECTION_OUT);
826 #endif
827 		ifp->if_opackets++;
828 	}
829 
830 	while (m0 != NULL && uio->uio_resid > 0 && error == 0) {
831 		len = ulmin(uio->uio_resid, m0->m_len);
832 		if (len != 0)
833 			error = uiomove(mtod(m0, caddr_t), len, uio);
834 		m = m_free(m0);
835 		m0 = m;
836 	}
837 
838 	if (m0 != NULL) {
839 		TUNDEBUG(("Dropping mbuf\n"));
840 		m_freem(m0);
841 	}
842 	if (error)
843 		ifp->if_oerrors++;
844 
845 	return (error);
846 }
847 
848 /*
849  * the cdevsw write interface - an atomic write is a packet - or else!
850  */
851 int
852 tunwrite(dev_t dev, struct uio *uio, int ioflag)
853 {
854 	struct tun_softc *tp;
855 
856 	if ((tp = tun_lookup(minor(dev))) == NULL)
857 		return (ENXIO);
858 	return (tun_dev_write(tp, uio, ioflag));
859 }
860 
861 int
862 tapwrite(dev_t dev, struct uio *uio, int ioflag)
863 {
864 	struct tun_softc *tp;
865 
866 	if ((tp = tap_lookup(minor(dev))) == NULL)
867 		return (ENXIO);
868 	return (tun_dev_write(tp, uio, ioflag));
869 }
870 
871 int
872 tun_dev_write(struct tun_softc *tp, struct uio *uio, int ioflag)
873 {
874 	struct ifnet		*ifp;
875 	struct niqueue		*ifq;
876 	u_int32_t		*th;
877 	struct mbuf		*top, **mp, *m;
878 	int			error = 0, tlen;
879 	size_t			mlen;
880 #if NBPFILTER > 0
881 	int			 s;
882 #endif
883 	ifp = &tp->tun_if;
884 	TUNDEBUG(("%s: tunwrite\n", ifp->if_xname));
885 
886 	if (uio->uio_resid == 0 || uio->uio_resid > ifp->if_mtu +
887 	    (tp->tun_flags & TUN_LAYER2 ? ETHER_HDR_LEN : sizeof(*th))) {
888 		TUNDEBUG(("%s: len=%d!\n", ifp->if_xname, uio->uio_resid));
889 		return (EMSGSIZE);
890 	}
891 	tlen = uio->uio_resid;
892 
893 	/* get a header mbuf */
894 	MGETHDR(m, M_DONTWAIT, MT_DATA);
895 	if (m == NULL)
896 		return (ENOBUFS);
897 	mlen = MHLEN;
898 	if (uio->uio_resid >= MINCLSIZE) {
899 		MCLGET(m, M_DONTWAIT);
900 		if (!(m->m_flags & M_EXT)) {
901 			m_free(m);
902 			return (ENOBUFS);
903 		}
904 		mlen = MCLBYTES;
905 	}
906 
907 	top = NULL;
908 	mp = &top;
909 	if (tp->tun_flags & TUN_LAYER2) {
910 		/*
911 		 * Pad so that IP header is correctly aligned
912 		 * this is necessary for all strict aligned architectures.
913 		 */
914 		mlen -= ETHER_ALIGN;
915 		m->m_data += ETHER_ALIGN;
916 	}
917 	while (error == 0 && uio->uio_resid > 0) {
918 		m->m_len = ulmin(mlen, uio->uio_resid);
919 		error = uiomove(mtod (m, caddr_t), m->m_len, uio);
920 		*mp = m;
921 		mp = &m->m_next;
922 		if (error == 0 && uio->uio_resid > 0) {
923 			MGET(m, M_DONTWAIT, MT_DATA);
924 			if (m == NULL) {
925 				error = ENOBUFS;
926 				break;
927 			}
928 			mlen = MLEN;
929 			if (uio->uio_resid >= MINCLSIZE) {
930 				MCLGET(m, M_DONTWAIT);
931 				if (!(m->m_flags & M_EXT)) {
932 					error = ENOBUFS;
933 					m_free(m);
934 					break;
935 				}
936 				mlen = MCLBYTES;
937 			}
938 		}
939 	}
940 	if (error) {
941 		m_freem(top);
942 		ifp->if_ierrors++;
943 		return (error);
944 	}
945 
946 	top->m_pkthdr.len = tlen;
947 
948 	if (tp->tun_flags & TUN_LAYER2) {
949 		struct mbuf_list ml = MBUF_LIST_INITIALIZER();
950 
951 		ml_enqueue(&ml, top);
952 		if_input(ifp, &ml);
953 		return (0);
954 	}
955 
956 #if NBPFILTER > 0
957 	if (ifp->if_bpf) {
958 		s = splnet();
959 		bpf_mtap(ifp->if_bpf, top, BPF_DIRECTION_IN);
960 		splx(s);
961 	}
962 #endif
963 
964 	th = mtod(top, u_int32_t *);
965 	/* strip the tunnel header */
966 	top->m_data += sizeof(*th);
967 	top->m_len  -= sizeof(*th);
968 	top->m_pkthdr.len -= sizeof(*th);
969 	top->m_pkthdr.ph_rtableid = ifp->if_rdomain;
970 	top->m_pkthdr.ph_ifidx = ifp->if_index;
971 
972 	switch (ntohl(*th)) {
973 	case AF_INET:
974 		ifq = &ipintrq;
975 		break;
976 #ifdef INET6
977 	case AF_INET6:
978 		ifq = &ip6intrq;
979 		break;
980 #endif
981 	default:
982 		m_freem(top);
983 		return (EAFNOSUPPORT);
984 	}
985 
986 	if (niq_enqueue(ifq, top) != 0) {
987 		ifp->if_collisions++;
988 		return (ENOBUFS);
989 	}
990 
991 	ifp->if_ipackets++;
992 	ifp->if_ibytes += top->m_pkthdr.len;
993 
994 	return (error);
995 }
996 
997 /*
998  * tunpoll - the poll interface, this is only useful on reads
999  * really. The write detect always returns true, write never blocks
1000  * anyway, it either accepts the packet or drops it.
1001  */
1002 int
1003 tunpoll(dev_t dev, int events, struct proc *p)
1004 {
1005 	struct tun_softc *tp;
1006 
1007 	if ((tp = tun_lookup(minor(dev))) == NULL)
1008 		return (POLLERR);
1009 	return (tun_dev_poll(tp, events, p));
1010 }
1011 
1012 int
1013 tappoll(dev_t dev, int events, struct proc *p)
1014 {
1015 	struct tun_softc *tp;
1016 
1017 	if ((tp = tap_lookup(minor(dev))) == NULL)
1018 		return (POLLERR);
1019 	return (tun_dev_poll(tp, events, p));
1020 }
1021 
1022 int
1023 tun_dev_poll(struct tun_softc *tp, int events, struct proc *p)
1024 {
1025 	int			 revents, s;
1026 	struct ifnet		*ifp;
1027 	unsigned int		 len;
1028 
1029 	ifp = &tp->tun_if;
1030 	revents = 0;
1031 	s = splnet();
1032 	TUNDEBUG(("%s: tunpoll\n", ifp->if_xname));
1033 
1034 	if (events & (POLLIN | POLLRDNORM)) {
1035 		len = IFQ_LEN(&ifp->if_snd);
1036 		if (len > 0) {
1037 			TUNDEBUG(("%s: tunselect q=%d\n", ifp->if_xname, len));
1038 			revents |= events & (POLLIN | POLLRDNORM);
1039 		} else {
1040 			TUNDEBUG(("%s: tunpoll waiting\n", ifp->if_xname));
1041 			selrecord(p, &tp->tun_rsel);
1042 		}
1043 	}
1044 	if (events & (POLLOUT | POLLWRNORM))
1045 		revents |= events & (POLLOUT | POLLWRNORM);
1046 	splx(s);
1047 	return (revents);
1048 }
1049 
1050 /*
1051  * kqueue(2) support.
1052  *
1053  * The tun driver uses an array of tun_softc's based on the minor number
1054  * of the device.  kn->kn_hook gets set to the specific tun_softc.
1055  *
1056  * filt_tunread() sets kn->kn_data to the iface qsize
1057  * filt_tunwrite() sets kn->kn_data to the MTU size
1058  */
1059 int
1060 tunkqfilter(dev_t dev, struct knote *kn)
1061 {
1062 	struct tun_softc *tp;
1063 
1064 	if ((tp = tun_lookup(minor(dev))) == NULL)
1065 		return (ENXIO);
1066 	return (tun_dev_kqfilter(tp, kn));
1067 }
1068 
1069 int
1070 tapkqfilter(dev_t dev, struct knote *kn)
1071 {
1072 	struct tun_softc *tp;
1073 
1074 	if ((tp = tap_lookup(minor(dev))) == NULL)
1075 		return (ENXIO);
1076 	return (tun_dev_kqfilter(tp, kn));
1077 }
1078 
1079 int
1080 tun_dev_kqfilter(struct tun_softc *tp, struct knote *kn)
1081 {
1082 	int			 s;
1083 	struct klist		*klist;
1084 	struct ifnet		*ifp;
1085 
1086 	ifp = &tp->tun_if;
1087 	TUNDEBUG(("%s: tunkqfilter\n", ifp->if_xname));
1088 
1089 	switch (kn->kn_filter) {
1090 		case EVFILT_READ:
1091 			klist = &tp->tun_rsel.si_note;
1092 			kn->kn_fop = &tunread_filtops;
1093 			break;
1094 		case EVFILT_WRITE:
1095 			klist = &tp->tun_wsel.si_note;
1096 			kn->kn_fop = &tunwrite_filtops;
1097 			break;
1098 		default:
1099 			return (EINVAL);
1100 	}
1101 
1102 	kn->kn_hook = (caddr_t)tp;
1103 
1104 	s = splhigh();
1105 	SLIST_INSERT_HEAD(klist, kn, kn_selnext);
1106 	splx(s);
1107 
1108 	return (0);
1109 }
1110 
1111 void
1112 filt_tunrdetach(struct knote *kn)
1113 {
1114 	int			 s;
1115 	struct tun_softc	*tp;
1116 
1117 	tp = (struct tun_softc *)kn->kn_hook;
1118 	s = splhigh();
1119 	if (!(kn->kn_status & KN_DETACHED))
1120 		SLIST_REMOVE(&tp->tun_rsel.si_note, kn, knote, kn_selnext);
1121 	splx(s);
1122 }
1123 
1124 int
1125 filt_tunread(struct knote *kn, long hint)
1126 {
1127 	int			 s;
1128 	struct tun_softc	*tp;
1129 	struct ifnet		*ifp;
1130 	unsigned int		 len;
1131 
1132 	if (kn->kn_status & KN_DETACHED) {
1133 		kn->kn_data = 0;
1134 		return (1);
1135 	}
1136 
1137 	tp = (struct tun_softc *)kn->kn_hook;
1138 	ifp = &tp->tun_if;
1139 
1140 	s = splnet();
1141 	len = IFQ_LEN(&ifp->if_snd);
1142 	if (len > 0) {
1143 		splx(s);
1144 		kn->kn_data = len;
1145 
1146 		TUNDEBUG(("%s: tunkqread q=%d\n", ifp->if_xname,
1147 		    IFQ_LEN(&ifp->if_snd)));
1148 		return (1);
1149 	}
1150 	splx(s);
1151 	TUNDEBUG(("%s: tunkqread waiting\n", ifp->if_xname));
1152 	return (0);
1153 }
1154 
1155 void
1156 filt_tunwdetach(struct knote *kn)
1157 {
1158 	int			 s;
1159 	struct tun_softc	*tp;
1160 
1161 	tp = (struct tun_softc *)kn->kn_hook;
1162 	s = splhigh();
1163 	if (!(kn->kn_status & KN_DETACHED))
1164 		SLIST_REMOVE(&tp->tun_wsel.si_note, kn, knote, kn_selnext);
1165 	splx(s);
1166 }
1167 
1168 int
1169 filt_tunwrite(struct knote *kn, long hint)
1170 {
1171 	struct tun_softc	*tp;
1172 	struct ifnet		*ifp;
1173 
1174 	if (kn->kn_status & KN_DETACHED) {
1175 		kn->kn_data = 0;
1176 		return (1);
1177 	}
1178 
1179 	tp = (struct tun_softc *)kn->kn_hook;
1180 	ifp = &tp->tun_if;
1181 
1182 	kn->kn_data = ifp->if_mtu;
1183 
1184 	return (1);
1185 }
1186 
1187 void
1188 tun_start(struct ifnet *ifp)
1189 {
1190 	struct tun_softc	*tp = ifp->if_softc;
1191 
1192 	splassert(IPL_NET);
1193 
1194 	if (IFQ_LEN(&ifp->if_snd))
1195 		tun_wakeup(tp);
1196 }
1197 
1198 void
1199 tun_link_state(struct tun_softc *tp)
1200 {
1201 	struct ifnet *ifp = &tp->tun_if;
1202 	int link_state = LINK_STATE_DOWN;
1203 
1204 	if (tp->tun_flags & TUN_OPEN) {
1205 		if (tp->tun_flags & TUN_LAYER2)
1206 			link_state = LINK_STATE_FULL_DUPLEX;
1207 		else
1208 			link_state = LINK_STATE_UP;
1209 	}
1210 	if (ifp->if_link_state != link_state) {
1211 		ifp->if_link_state = link_state;
1212 		if_link_state_change(ifp);
1213 	}
1214 }
1215