xref: /openbsd-src/sys/net/if_tun.c (revision a28daedfc357b214be5c701aa8ba8adb29a7f1c2)
1 /*	$OpenBSD: if_tun.c,v 1.96 2009/02/20 12:47:57 jsing Exp $	*/
2 /*	$NetBSD: if_tun.c,v 1.24 1996/05/07 02:40:48 thorpej Exp $	*/
3 
4 /*
5  * Copyright (c) 1988, Julian Onions <Julian.Onions@nexor.co.uk>
6  * Nottingham University 1987.
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29 
30 /*
31  * This driver takes packets off the IP i/f and hands them up to a
32  * user process to have its wicked way with. This driver has its
33  * roots in a similar driver written by Phil Cockcroft (formerly) at
34  * UCL. This driver is based much more on read/write/select mode of
35  * operation though.
36  */
37 
38 /* #define	TUN_DEBUG	9 */
39 
40 #include <sys/param.h>
41 #include <sys/kernel.h>
42 #include <sys/proc.h>
43 #include <sys/systm.h>
44 #include <sys/mbuf.h>
45 #include <sys/protosw.h>
46 #include <sys/socket.h>
47 #include <sys/ioctl.h>
48 #include <sys/errno.h>
49 #include <sys/syslog.h>
50 #include <sys/selinfo.h>
51 #include <sys/file.h>
52 #include <sys/time.h>
53 #include <sys/device.h>
54 #include <sys/vnode.h>
55 #include <sys/signalvar.h>
56 #include <sys/poll.h>
57 #include <sys/conf.h>
58 
59 #include <machine/cpu.h>
60 
61 #include <net/if.h>
62 #include <net/if_types.h>
63 #include <net/netisr.h>
64 #include <net/route.h>
65 
66 #ifdef INET
67 #include <netinet/in.h>
68 #include <netinet/in_systm.h>
69 #include <netinet/in_var.h>
70 #include <netinet/ip.h>
71 #include <netinet/if_ether.h>
72 #endif
73 
74 #ifdef NETATALK
75 #include <netatalk/at.h>
76 #include <netatalk/at_var.h>
77 #endif
78 
79 #include "bpfilter.h"
80 #if NBPFILTER > 0
81 #include <net/bpf.h>
82 #endif
83 
84 /* for arc4random() */
85 #include <dev/rndvar.h>
86 
87 #include <net/if_tun.h>
88 
89 struct tun_softc {
90 	struct arpcom	arpcom;		/* ethernet common data */
91 	u_short		tun_flags;	/* misc flags */
92 	pid_t		tun_pgid;	/* the process group - if any */
93 	uid_t		tun_siguid;	/* uid for process that set tun_pgid */
94 	uid_t		tun_sigeuid;	/* euid for process that set tun_pgid */
95 	struct selinfo	tun_rsel;	/* read select */
96 	struct selinfo	tun_wsel;	/* write select (not used) */
97 	int		tun_unit;
98 	LIST_ENTRY(tun_softc) tun_list;	/* all tunnel interfaces */
99 #define tun_if	arpcom.ac_if
100 };
101 
102 #ifdef	TUN_DEBUG
103 int	tundebug = TUN_DEBUG;
104 #define TUNDEBUG(a)	(tundebug? printf a : 0)
105 #else
106 #define TUNDEBUG(a)	/* (tundebug? printf a : 0) */
107 #endif
108 
109 /* Only these IFF flags are changeable by TUNSIFINFO */
110 #define TUN_IFF_FLAGS (IFF_UP|IFF_POINTOPOINT|IFF_MULTICAST|IFF_BROADCAST)
111 
112 void	tunattach(int);
113 int	tunopen(dev_t, int, int, struct proc *);
114 int	tunclose(dev_t, int, int, struct proc *);
115 int	tun_ioctl(struct ifnet *, u_long, caddr_t);
116 int	tun_output(struct ifnet *, struct mbuf *, struct sockaddr *,
117 	    struct rtentry *);
118 int	tunioctl(dev_t, u_long, caddr_t, int, struct proc *);
119 int	tunread(dev_t, struct uio *, int);
120 int	tunwrite(dev_t, struct uio *, int);
121 int	tunpoll(dev_t, int, struct proc *);
122 int	tunkqfilter(dev_t, struct knote *);
123 int	tun_clone_create(struct if_clone *, int);
124 int	tun_create(struct if_clone *, int, int);
125 int	tun_clone_destroy(struct ifnet *);
126 struct	tun_softc *tun_lookup(int);
127 void	tun_wakeup(struct tun_softc *);
128 int	tun_switch(struct tun_softc *, int);
129 
130 static int tuninit(struct tun_softc *);
131 static void tunstart(struct ifnet *);
132 int	filt_tunread(struct knote *, long);
133 int	filt_tunwrite(struct knote *, long);
134 void	filt_tunrdetach(struct knote *);
135 void	filt_tunwdetach(struct knote *);
136 
137 struct filterops tunread_filtops =
138 	{ 1, NULL, filt_tunrdetach, filt_tunread};
139 
140 struct filterops tunwrite_filtops =
141 	{ 1, NULL, filt_tunwdetach, filt_tunwrite};
142 
143 LIST_HEAD(, tun_softc) tun_softc_list;
144 
145 struct if_clone tun_cloner =
146     IF_CLONE_INITIALIZER("tun", tun_clone_create, tun_clone_destroy);
147 
148 void
149 tunattach(int n)
150 {
151 	LIST_INIT(&tun_softc_list);
152 	if_clone_attach(&tun_cloner);
153 }
154 
155 int
156 tun_clone_create(struct if_clone *ifc, int unit)
157 {
158 	return (tun_create(ifc, unit, 0));
159 }
160 
161 int
162 tun_create(struct if_clone *ifc, int unit, int flags)
163 {
164 	struct tun_softc	*tp;
165 	struct ifnet		*ifp;
166 	u_int32_t		 macaddr_rnd;
167 	int			 s;
168 
169 	tp = malloc(sizeof(*tp), M_DEVBUF, M_NOWAIT|M_ZERO);
170 	if (!tp)
171 		return (ENOMEM);
172 
173 	tp->tun_unit = unit;
174 	tp->tun_flags = TUN_INITED|TUN_STAYUP;
175 
176 	/* generate fake MAC address: 00 bd xx xx xx unit_no */
177 	tp->arpcom.ac_enaddr[0] = 0x00;
178 	tp->arpcom.ac_enaddr[1] = 0xbd;
179 	/*
180 	 * This no longer happens pre-scheduler so let's use the real
181 	 * random subsystem instead of random().
182 	 */
183 	macaddr_rnd = arc4random();
184 	bcopy(&macaddr_rnd, &tp->arpcom.ac_enaddr[2], sizeof(u_int32_t));
185 	tp->arpcom.ac_enaddr[5] = (u_char)unit + 1;
186 
187 	ifp = &tp->tun_if;
188 	snprintf(ifp->if_xname, sizeof ifp->if_xname, "%s%d", ifc->ifc_name,
189 	    unit);
190 	ifp->if_softc = tp;
191 	ifp->if_ioctl = tun_ioctl;
192 	ifp->if_output = tun_output;
193 	ifp->if_start = tunstart;
194 	IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen);
195 	IFQ_SET_READY(&ifp->if_snd);
196 	if ((flags & TUN_LAYER2) == 0) {
197 		tp->tun_flags &= ~TUN_LAYER2;
198 		ifp->if_mtu = ETHERMTU;
199 		ifp->if_flags = IFF_POINTOPOINT;
200 		ifp->if_type = IFT_TUNNEL;
201 		ifp->if_hdrlen = sizeof(u_int32_t);
202 		if_attach(ifp);
203 		if_alloc_sadl(ifp);
204 #if NBPFILTER > 0
205 		bpfattach(&ifp->if_bpf, ifp, DLT_LOOP, sizeof(u_int32_t));
206 #endif
207 	} else {
208 		tp->tun_flags |= TUN_LAYER2;
209 		ifp->if_flags =
210 		    (IFF_BROADCAST|IFF_SIMPLEX|IFF_MULTICAST|IFF_LINK0);
211 		if_attach(ifp);
212 		ether_ifattach(ifp);
213 	}
214 	/* force output function to our function */
215 	ifp->if_output = tun_output;
216 
217 	s = splnet();
218 	LIST_INSERT_HEAD(&tun_softc_list, tp, tun_list);
219 	splx(s);
220 
221 	return (0);
222 }
223 
224 int
225 tun_clone_destroy(struct ifnet *ifp)
226 {
227 	struct tun_softc	*tp = ifp->if_softc;
228 	int			 s;
229 
230 	tun_wakeup(tp);
231 
232 	s = splhigh();
233 	klist_invalidate(&tp->tun_rsel.si_note);
234 	klist_invalidate(&tp->tun_wsel.si_note);
235 	splx(s);
236 
237 	s = splnet();
238 	LIST_REMOVE(tp, tun_list);
239 	splx(s);
240 
241 	if (tp->tun_flags & TUN_LAYER2)
242 		ether_ifdetach(ifp);
243 
244 	if_detach(ifp);
245 
246 	free(tp, M_DEVBUF);
247 	return (0);
248 }
249 
250 struct tun_softc *
251 tun_lookup(int unit)
252 {
253 	struct tun_softc *tp;
254 
255 	LIST_FOREACH(tp, &tun_softc_list, tun_list)
256 		if (tp->tun_unit == unit)
257 			return (tp);
258 	return (NULL);
259 }
260 
261 int
262 tun_switch(struct tun_softc *tp, int flags)
263 {
264 	struct ifnet	*ifp = &tp->tun_if;
265 	int		 unit, open, r;
266 
267 	if ((tp->tun_flags & TUN_LAYER2) == (flags & TUN_LAYER2))
268 		return (0);
269 
270 	/* tp will be removed so store unit number */
271 	unit = tp->tun_unit;
272 	open = tp->tun_flags & (TUN_OPEN|TUN_NBIO|TUN_ASYNC);
273 	TUNDEBUG(("%s: switching to layer %d\n", ifp->if_xname,
274 		    flags & TUN_LAYER2 ? 2 : 3));
275 
276 	/* remove old device and ... */
277 	tun_clone_destroy(ifp);
278 	/* attach new interface */
279 	r = tun_create(&tun_cloner, unit, flags);
280 
281 	if (open && r == 0) {
282 		/* already opened before ifconfig tunX link0 */
283 		if ((tp = tun_lookup(unit)) == NULL)
284 			/* this should never fail */
285 			return (ENXIO);
286 		tp->tun_flags |= open;
287 		TUNDEBUG(("%s: already open\n", tp->tun_if.if_xname));
288 	}
289 	return (r);
290 }
291 
292 /*
293  * tunnel open - must be superuser & the device must be
294  * configured in
295  */
296 int
297 tunopen(dev_t dev, int flag, int mode, struct proc *p)
298 {
299 	struct tun_softc	*tp;
300 	struct ifnet		*ifp;
301 	int			 error, s;
302 
303 	if ((error = suser(p, 0)) != 0)
304 		return (error);
305 
306 	if ((tp = tun_lookup(minor(dev))) == NULL) {	/* create on demand */
307 		char	xname[IFNAMSIZ];
308 
309 		snprintf(xname, sizeof(xname), "%s%d", "tun", minor(dev));
310 		if ((error = if_clone_create(xname)) != 0)
311 			return (error);
312 
313 		if ((tp = tun_lookup(minor(dev))) == NULL)
314 			return (ENXIO);
315 		tp->tun_flags &= ~TUN_STAYUP;
316 	}
317 
318 	if (tp->tun_flags & TUN_OPEN)
319 		return (EBUSY);
320 
321 	ifp = &tp->tun_if;
322 	tp->tun_flags |= TUN_OPEN;
323 
324 	/* automatically UP the interface on open */
325 	s = splnet();
326 	if_up(ifp);
327 	ifp->if_flags |= IFF_RUNNING;
328 	splx(s);
329 
330 	TUNDEBUG(("%s: open\n", ifp->if_xname));
331 	return (0);
332 }
333 
334 /*
335  * tunclose - close the device; if closing the real device, flush pending
336  *  output and unless STAYUP bring down and destroy the interface.
337  */
338 int
339 tunclose(dev_t dev, int flag, int mode, struct proc *p)
340 {
341 	int			 s;
342 	struct tun_softc	*tp;
343 	struct ifnet		*ifp;
344 
345 	if ((tp = tun_lookup(minor(dev))) == NULL)
346 		return (ENXIO);
347 
348 	ifp = &tp->tun_if;
349 	tp->tun_flags &= ~(TUN_OPEN|TUN_NBIO|TUN_ASYNC);
350 	ifp->if_flags &= ~IFF_RUNNING;
351 
352 	/*
353 	 * junk all pending output
354 	 */
355 	s = splnet();
356 	IFQ_PURGE(&ifp->if_snd);
357 	splx(s);
358 
359 	TUNDEBUG(("%s: closed\n", ifp->if_xname));
360 
361 	if (!(tp->tun_flags & TUN_STAYUP))
362 		return (if_clone_destroy(ifp->if_xname));
363 	else {
364 		tp->tun_pgid = 0;
365 		selwakeup(&tp->tun_rsel);
366 		KNOTE(&tp->tun_rsel.si_note, 0);
367 	}
368 
369 	return (0);
370 }
371 
372 static int
373 tuninit(struct tun_softc *tp)
374 {
375 	struct ifnet	*ifp = &tp->tun_if;
376 	struct ifaddr	*ifa;
377 
378 	TUNDEBUG(("%s: tuninit\n", ifp->if_xname));
379 
380 	ifp->if_flags |= IFF_UP | IFF_RUNNING;
381 	ifp->if_flags &= ~IFF_OACTIVE; /* we are never active */
382 
383 	tp->tun_flags &= ~(TUN_IASET|TUN_DSTADDR|TUN_BRDADDR);
384 	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
385 #ifdef INET
386 		if (ifa->ifa_addr->sa_family == AF_INET) {
387 			struct sockaddr_in *sin;
388 
389 			sin = satosin(ifa->ifa_addr);
390 			if (sin && sin->sin_addr.s_addr)
391 				tp->tun_flags |= TUN_IASET;
392 
393 			if (ifp->if_flags & IFF_POINTOPOINT) {
394 				sin = satosin(ifa->ifa_dstaddr);
395 				if (sin && sin->sin_addr.s_addr)
396 					tp->tun_flags |= TUN_DSTADDR;
397 			} else
398 				tp->tun_flags &= ~TUN_DSTADDR;
399 
400 			if (ifp->if_flags & IFF_BROADCAST) {
401 				sin = satosin(ifa->ifa_broadaddr);
402 				if (sin && sin->sin_addr.s_addr)
403 					tp->tun_flags |= TUN_BRDADDR;
404 			} else
405 				tp->tun_flags &= ~TUN_BRDADDR;
406 		}
407 #endif
408 #ifdef INET6
409 		if (ifa->ifa_addr->sa_family == AF_INET6) {
410 			struct sockaddr_in6 *sin;
411 
412 			sin = (struct sockaddr_in6 *)ifa->ifa_addr;
413 			if (!IN6_IS_ADDR_UNSPECIFIED(&sin->sin6_addr))
414 				tp->tun_flags |= TUN_IASET;
415 
416 			if (ifp->if_flags & IFF_POINTOPOINT) {
417 				sin = (struct sockaddr_in6 *)ifa->ifa_dstaddr;
418 				if (sin &&
419 				    !IN6_IS_ADDR_UNSPECIFIED(&sin->sin6_addr))
420 					tp->tun_flags |= TUN_DSTADDR;
421 			} else
422 				tp->tun_flags &= ~TUN_DSTADDR;
423 		}
424 #endif /* INET6 */
425 	}
426 
427 	return (0);
428 }
429 
430 /*
431  * Process an ioctl request.
432  */
433 int
434 tun_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
435 {
436 	struct tun_softc	*tp = (struct tun_softc *)(ifp->if_softc);
437 	struct ifreq		*ifr = (struct ifreq *)data;
438 	int			 error = 0, s;
439 
440 	s = splnet();
441 
442 	switch (cmd) {
443 	case SIOCSIFADDR:
444 		tuninit(tp);
445 		TUNDEBUG(("%s: address set\n", ifp->if_xname));
446 		if (tp->tun_flags & TUN_LAYER2)
447 			switch (((struct ifaddr *)data)->ifa_addr->sa_family) {
448 #ifdef INET
449 			case AF_INET:
450 				arp_ifinit(&tp->arpcom, (struct ifaddr *)data);
451 				break;
452 #endif
453 			default:
454 				break;
455 			}
456 		break;
457 	case SIOCSIFDSTADDR:
458 		tuninit(tp);
459 		TUNDEBUG(("%s: destination address set\n", ifp->if_xname));
460 		break;
461 	case SIOCSIFBRDADDR:
462 		tuninit(tp);
463 		TUNDEBUG(("%s: broadcast address set\n", ifp->if_xname));
464 		break;
465 	case SIOCSIFMTU:
466 		if (ifr->ifr_mtu < ETHERMIN || ifr->ifr_mtu > TUNMRU)
467 			error = EINVAL;
468 		else
469 			ifp->if_mtu = ifr->ifr_mtu;
470 		break;
471 	case SIOCADDMULTI:
472 	case SIOCDELMULTI: {
473 		if (ifr == 0) {
474 			error = EAFNOSUPPORT;	   /* XXX */
475 			break;
476 		}
477 
478 		if (tp->tun_flags & TUN_LAYER2) {
479 			error = (cmd == SIOCADDMULTI) ?
480 			    ether_addmulti(ifr, &tp->arpcom) :
481 			    ether_delmulti(ifr, &tp->arpcom);
482 			if (error == ENETRESET) {
483 				/*
484 				 * Multicast list has changed; set the hardware
485 				 * filter accordingly. The good thing is we do
486 				 * not have a hardware filter (:
487 				 */
488 				error = 0;
489 			}
490 			break;
491 		}
492 
493 		switch (ifr->ifr_addr.sa_family) {
494 #ifdef INET
495 		case AF_INET:
496 			break;
497 #endif
498 #ifdef INET6
499 		case AF_INET6:
500 			break;
501 #endif
502 		default:
503 			error = EAFNOSUPPORT;
504 			break;
505 		}
506 		break;
507 	}
508 
509 	case SIOCSIFFLAGS:
510 		error = tun_switch(tp,
511 		    ifp->if_flags & IFF_LINK0 ? TUN_LAYER2 : 0);
512 		break;
513 	default:
514 		if (tp->tun_flags & TUN_LAYER2)
515 			error = ether_ioctl(ifp, &tp->arpcom, cmd, data);
516 		else
517 			error = ENOTTY;
518 	}
519 
520 	splx(s);
521 	return (error);
522 }
523 
524 /*
525  * tun_output - queue packets from higher level ready to put out.
526  */
527 int
528 tun_output(struct ifnet *ifp, struct mbuf *m0, struct sockaddr *dst,
529     struct rtentry *rt)
530 {
531 	struct tun_softc	*tp = ifp->if_softc;
532 	int			 s, len, error;
533 	u_int32_t		*af;
534 
535 	if ((ifp->if_flags & (IFF_UP|IFF_RUNNING)) != (IFF_UP|IFF_RUNNING)) {
536 		m_freem(m0);
537 		return (EHOSTDOWN);
538 	}
539 
540 	TUNDEBUG(("%s: tun_output\n", ifp->if_xname));
541 
542 	if ((tp->tun_flags & TUN_READY) != TUN_READY) {
543 		TUNDEBUG(("%s: not ready %#x\n", ifp->if_xname,
544 		     tp->tun_flags));
545 		m_freem(m0);
546 		return (EHOSTDOWN);
547 	}
548 
549 	if (tp->tun_flags & TUN_LAYER2)
550 		/* call ether_output and that will call tunstart at the end */
551 		return (ether_output(ifp, m0, dst, rt));
552 
553 	M_PREPEND(m0, sizeof(*af), M_DONTWAIT);
554 	if (m0 == NULL)
555 		return (ENOBUFS);
556 	af = mtod(m0, u_int32_t *);
557 	*af = htonl(dst->sa_family);
558 
559 	s = splnet();
560 
561 #if NBPFILTER > 0
562 	if (ifp->if_bpf)
563 		bpf_mtap(ifp->if_bpf, m0, BPF_DIRECTION_OUT);
564 #endif
565 
566 	len = m0->m_pkthdr.len;
567 	IFQ_ENQUEUE(&ifp->if_snd, m0, NULL, error);
568 	if (error) {
569 		splx(s);
570 		ifp->if_collisions++;
571 		return (error);
572 	}
573 	splx(s);
574 	ifp->if_opackets++;
575 	ifp->if_obytes += len;
576 
577 	tun_wakeup(tp);
578 	return (0);
579 }
580 
581 void
582 tun_wakeup(struct tun_softc *tp)
583 {
584 	if (tp->tun_flags & TUN_RWAIT) {
585 		tp->tun_flags &= ~TUN_RWAIT;
586 		wakeup((caddr_t)tp);
587 	}
588 	if (tp->tun_flags & TUN_ASYNC && tp->tun_pgid)
589 		csignal(tp->tun_pgid, SIGIO,
590 		    tp->tun_siguid, tp->tun_sigeuid);
591 	selwakeup(&tp->tun_rsel);
592 	KNOTE(&tp->tun_rsel.si_note, 0);
593 }
594 
595 /*
596  * the cdevsw interface is now pretty minimal.
597  */
598 int
599 tunioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p)
600 {
601 	int			 s;
602 	struct tun_softc	*tp;
603 	struct tuninfo		*tunp;
604 	struct mbuf		*m;
605 
606 	if ((tp = tun_lookup(minor(dev))) == NULL)
607 		return (ENXIO);
608 
609 	s = splnet();
610 	switch (cmd) {
611 	case TUNSIFINFO:
612 		tunp = (struct tuninfo *)data;
613 		if (tunp->mtu < ETHERMIN || tunp->mtu > TUNMRU) {
614 			splx(s);
615 			return (EINVAL);
616 		}
617 		tp->tun_if.if_mtu = tunp->mtu;
618 		tp->tun_if.if_type = tunp->type;
619 		tp->tun_if.if_flags =
620 		    (tunp->flags & TUN_IFF_FLAGS) |
621 		    (tp->tun_if.if_flags & ~TUN_IFF_FLAGS);
622 		tp->tun_if.if_baudrate = tunp->baudrate;
623 		break;
624 	case TUNGIFINFO:
625 		tunp = (struct tuninfo *)data;
626 		tunp->mtu = tp->tun_if.if_mtu;
627 		tunp->type = tp->tun_if.if_type;
628 		tunp->flags = tp->tun_if.if_flags;
629 		tunp->baudrate = tp->tun_if.if_baudrate;
630 		break;
631 #ifdef TUN_DEBUG
632 	case TUNSDEBUG:
633 		tundebug = *(int *)data;
634 		break;
635 	case TUNGDEBUG:
636 		*(int *)data = tundebug;
637 		break;
638 #endif
639 	case TUNSIFMODE:
640 		switch (*(int *)data & (IFF_POINTOPOINT|IFF_BROADCAST)) {
641 		case IFF_POINTOPOINT:
642 		case IFF_BROADCAST:
643 			tp->tun_if.if_flags &= ~TUN_IFF_FLAGS;
644 			tp->tun_if.if_flags |= *(int *)data & TUN_IFF_FLAGS;
645 			break;
646 		default:
647 			splx(s);
648 			return (EINVAL);
649 		}
650 		break;
651 
652 	case FIONBIO:
653 		if (*(int *)data)
654 			tp->tun_flags |= TUN_NBIO;
655 		else
656 			tp->tun_flags &= ~TUN_NBIO;
657 		break;
658 	case FIOASYNC:
659 		if (*(int *)data)
660 			tp->tun_flags |= TUN_ASYNC;
661 		else
662 			tp->tun_flags &= ~TUN_ASYNC;
663 		break;
664 	case FIONREAD:
665 		IFQ_POLL(&tp->tun_if.if_snd, m);
666 		if (m != NULL)
667 			*(int *)data = m->m_pkthdr.len;
668 		else
669 			*(int *)data = 0;
670 		break;
671 	case TIOCSPGRP:
672 		tp->tun_pgid = *(int *)data;
673 		tp->tun_siguid = p->p_cred->p_ruid;
674 		tp->tun_sigeuid = p->p_ucred->cr_uid;
675 		break;
676 	case TIOCGPGRP:
677 		*(int *)data = tp->tun_pgid;
678 		break;
679 	case OSIOCGIFADDR:
680 	case SIOCGIFADDR:
681 		if (!(tp->tun_flags & TUN_LAYER2)) {
682 			splx(s);
683 			return (EINVAL);
684 		}
685 		bcopy(tp->arpcom.ac_enaddr, data,
686 		    sizeof(tp->arpcom.ac_enaddr));
687 		break;
688 
689 	case SIOCSIFADDR:
690 		if (!(tp->tun_flags & TUN_LAYER2)) {
691 			splx(s);
692 			return (EINVAL);
693 		}
694 		bcopy(data, tp->arpcom.ac_enaddr,
695 		    sizeof(tp->arpcom.ac_enaddr));
696 		break;
697 	default:
698 		splx(s);
699 		return (ENOTTY);
700 	}
701 	splx(s);
702 	return (0);
703 }
704 
705 /*
706  * The cdevsw read interface - reads a packet at a time, or at
707  * least as much of a packet as can be read.
708  */
709 int
710 tunread(dev_t dev, struct uio *uio, int ioflag)
711 {
712 	struct tun_softc	*tp;
713 	struct ifnet		*ifp;
714 	struct mbuf		*m, *m0;
715 	int			 error = 0, len, s;
716 
717 	if ((tp = tun_lookup(minor(dev))) == NULL)
718 		return (ENXIO);
719 
720 	ifp = &tp->tun_if;
721 	TUNDEBUG(("%s: read\n", ifp->if_xname));
722 	if ((tp->tun_flags & TUN_READY) != TUN_READY) {
723 		TUNDEBUG(("%s: not ready %#x\n", ifp->if_xname, tp->tun_flags));
724 		return (EHOSTDOWN);
725 	}
726 
727 	tp->tun_flags &= ~TUN_RWAIT;
728 
729 	s = splnet();
730 	do {
731 		while ((tp->tun_flags & TUN_READY) != TUN_READY)
732 			if ((error = tsleep((caddr_t)tp,
733 			    (PZERO + 1)|PCATCH, "tunread", 0)) != 0) {
734 				splx(s);
735 				return (error);
736 			}
737 		IFQ_DEQUEUE(&ifp->if_snd, m0);
738 		if (m0 == NULL) {
739 			if (tp->tun_flags & TUN_NBIO && ioflag & IO_NDELAY) {
740 				splx(s);
741 				return (EWOULDBLOCK);
742 			}
743 			tp->tun_flags |= TUN_RWAIT;
744 			if ((error = tsleep((caddr_t)tp,
745 			    (PZERO + 1)|PCATCH, "tunread", 0)) != 0) {
746 				splx(s);
747 				return (error);
748 			}
749 		}
750 	} while (m0 == NULL);
751 	splx(s);
752 
753 	while (m0 != NULL && uio->uio_resid > 0 && error == 0) {
754 		len = min(uio->uio_resid, m0->m_len);
755 		if (len != 0)
756 			error = uiomove(mtod(m0, caddr_t), len, uio);
757 		MFREE(m0, m);
758 		m0 = m;
759 	}
760 
761 	if (m0 != NULL) {
762 		TUNDEBUG(("Dropping mbuf\n"));
763 		m_freem(m0);
764 	}
765 	if (error)
766 		ifp->if_ierrors++;
767 
768 	return (error);
769 }
770 
771 /*
772  * the cdevsw write interface - an atomic write is a packet - or else!
773  */
774 int
775 tunwrite(dev_t dev, struct uio *uio, int ioflag)
776 {
777 	struct tun_softc	*tp;
778 	struct ifnet		*ifp;
779 	struct ifqueue		*ifq;
780 	u_int32_t		*th;
781 	struct mbuf		*top, **mp, *m;
782 	int			 isr;
783 	int			 error=0, s, tlen, mlen;
784 
785 	if ((tp = tun_lookup(minor(dev))) == NULL)
786 		return (ENXIO);
787 
788 	ifp = &tp->tun_if;
789 	TUNDEBUG(("%s: tunwrite\n", ifp->if_xname));
790 
791 	if (uio->uio_resid == 0 || uio->uio_resid > ifp->if_mtu +
792 	    (tp->tun_flags & TUN_LAYER2 ? ETHER_HDR_LEN : sizeof(*th))) {
793 		TUNDEBUG(("%s: len=%d!\n", ifp->if_xname, uio->uio_resid));
794 		return (EMSGSIZE);
795 	}
796 	tlen = uio->uio_resid;
797 
798 	/* get a header mbuf */
799 	MGETHDR(m, M_DONTWAIT, MT_DATA);
800 	if (m == NULL)
801 		return (ENOBUFS);
802 	mlen = MHLEN;
803 	if (uio->uio_resid >= MINCLSIZE) {
804 		MCLGET(m, M_DONTWAIT);
805 		if (!(m->m_flags & M_EXT)) {
806 			m_free(m);
807 			return (ENOBUFS);
808 		}
809 		mlen = MCLBYTES;
810 	}
811 
812 	top = NULL;
813 	mp = &top;
814 	if (tp->tun_flags & TUN_LAYER2) {
815 		/*
816 		 * Pad so that IP header is correctly aligned
817 		 * this is necessary for all strict aligned architectures.
818 		 */
819 		mlen -= ETHER_ALIGN;
820 		m->m_data += ETHER_ALIGN;
821 	}
822 	while (error == 0 && uio->uio_resid > 0) {
823 		m->m_len = min(mlen, uio->uio_resid);
824 		error = uiomove(mtod (m, caddr_t), m->m_len, uio);
825 		*mp = m;
826 		mp = &m->m_next;
827 		if (error == 0 && uio->uio_resid > 0) {
828 			MGET(m, M_DONTWAIT, MT_DATA);
829 			if (m == NULL) {
830 				error = ENOBUFS;
831 				break;
832 			}
833 			mlen = MLEN;
834 			if (uio->uio_resid >= MINCLSIZE) {
835 				MCLGET(m, M_DONTWAIT);
836 				if (!(m->m_flags & M_EXT)) {
837 					error = ENOBUFS;
838 					m_free(m);
839 					break;
840 				}
841 				mlen = MCLBYTES;
842 			}
843 		}
844 	}
845 	if (error) {
846 		if (top != NULL)
847 			m_freem(top);
848 		ifp->if_ierrors++;
849 		return (error);
850 	}
851 
852 	top->m_pkthdr.len = tlen;
853 	top->m_pkthdr.rcvif = ifp;
854 
855 #if NBPFILTER > 0
856 	if (ifp->if_bpf) {
857 		s = splnet();
858 		bpf_mtap(ifp->if_bpf, top, BPF_DIRECTION_IN);
859 		splx(s);
860 	}
861 #endif
862 
863 	if (tp->tun_flags & TUN_LAYER2) {
864 		/* quirk to not add randomness from a virtual device */
865 		atomic_setbits_int(&netisr, (1 << NETISR_RND_DONE));
866 
867 		s = splnet();
868 		ether_input_mbuf(ifp, top);
869 		splx(s);
870 
871 		ifp->if_ipackets++; /* ibytes are counted in ether_input */
872 
873 		return (0);
874 	}
875 
876 	th = mtod(top, u_int32_t *);
877 	/* strip the tunnel header */
878 	top->m_data += sizeof(*th);
879 	top->m_len  -= sizeof(*th);
880 	top->m_pkthdr.len -= sizeof(*th);
881 
882 	switch (ntohl(*th)) {
883 #ifdef INET
884 	case AF_INET:
885 		ifq = &ipintrq;
886 		isr = NETISR_IP;
887 		break;
888 #endif
889 #ifdef INET6
890 	case AF_INET6:
891 		ifq = &ip6intrq;
892 		isr = NETISR_IPV6;
893 		break;
894 #endif
895 #ifdef NETATALK
896 	case AF_APPLETALK:
897 		ifq = &atintrq2;
898 		isr = NETISR_ATALK;
899 		break;
900 #endif
901 	default:
902 		m_freem(top);
903 		return (EAFNOSUPPORT);
904 	}
905 
906 	s = splnet();
907 	if (IF_QFULL(ifq)) {
908 		IF_DROP(ifq);
909 		splx(s);
910 		ifp->if_collisions++;
911 		m_freem(top);
912 		if (!ifq->ifq_congestion)
913 			if_congestion(ifq);
914 		return (ENOBUFS);
915 	}
916 	IF_ENQUEUE(ifq, top);
917 	schednetisr(isr);
918 	ifp->if_ipackets++;
919 	ifp->if_ibytes += top->m_pkthdr.len;
920 	splx(s);
921 	return (error);
922 }
923 
924 /*
925  * tunpoll - the poll interface, this is only useful on reads
926  * really. The write detect always returns true, write never blocks
927  * anyway, it either accepts the packet or drops it.
928  */
929 int
930 tunpoll(dev_t dev, int events, struct proc *p)
931 {
932 	int			 revents, s;
933 	struct tun_softc	*tp;
934 	struct ifnet		*ifp;
935 	struct mbuf		*m;
936 
937 	if ((tp = tun_lookup(minor(dev))) == NULL)
938 		return (POLLERR);
939 
940 	ifp = &tp->tun_if;
941 	revents = 0;
942 	s = splnet();
943 	TUNDEBUG(("%s: tunpoll\n", ifp->if_xname));
944 
945 	if (events & (POLLIN | POLLRDNORM)) {
946 		IFQ_POLL(&ifp->if_snd, m);
947 		if (m != NULL) {
948 			TUNDEBUG(("%s: tunselect q=%d\n", ifp->if_xname,
949 			    ifp->if_snd.ifq_len));
950 			revents |= events & (POLLIN | POLLRDNORM);
951 		} else {
952 			TUNDEBUG(("%s: tunpoll waiting\n", ifp->if_xname));
953 			selrecord(p, &tp->tun_rsel);
954 		}
955 	}
956 	if (events & (POLLOUT | POLLWRNORM))
957 		revents |= events & (POLLOUT | POLLWRNORM);
958 	splx(s);
959 	return (revents);
960 }
961 
962 /*
963  * kqueue(2) support.
964  *
965  * The tun driver uses an array of tun_softc's based on the minor number
966  * of the device.  kn->kn_hook gets set to the specific tun_softc.
967  *
968  * filt_tunread() sets kn->kn_data to the iface qsize
969  * filt_tunwrite() sets kn->kn_data to the MTU size
970  */
971 int
972 tunkqfilter(dev_t dev, struct knote *kn)
973 {
974 	int			 s;
975 	struct klist		*klist;
976 	struct tun_softc	*tp;
977 	struct ifnet		*ifp;
978 
979 	if ((tp = tun_lookup(minor(dev))) == NULL)
980 		return (ENXIO);
981 
982 	ifp = &tp->tun_if;
983 
984 	s = splnet();
985 	TUNDEBUG(("%s: tunkqfilter\n", ifp->if_xname));
986 	splx(s);
987 
988 	switch (kn->kn_filter) {
989 		case EVFILT_READ:
990 			klist = &tp->tun_rsel.si_note;
991 			kn->kn_fop = &tunread_filtops;
992 			break;
993 		case EVFILT_WRITE:
994 			klist = &tp->tun_wsel.si_note;
995 			kn->kn_fop = &tunwrite_filtops;
996 			break;
997 		default:
998 			return (EPERM);	/* 1 */
999 	}
1000 
1001 	kn->kn_hook = (caddr_t)tp;
1002 
1003 	s = splhigh();
1004 	SLIST_INSERT_HEAD(klist, kn, kn_selnext);
1005 	splx(s);
1006 
1007 	return (0);
1008 }
1009 
1010 void
1011 filt_tunrdetach(struct knote *kn)
1012 {
1013 	int			 s;
1014 	struct tun_softc	*tp;
1015 
1016 	tp = (struct tun_softc *)kn->kn_hook;
1017 	s = splhigh();
1018 	if (!(kn->kn_status & KN_DETACHED))
1019 		SLIST_REMOVE(&tp->tun_rsel.si_note, kn, knote, kn_selnext);
1020 	splx(s);
1021 }
1022 
1023 int
1024 filt_tunread(struct knote *kn, long hint)
1025 {
1026 	int			 s;
1027 	struct tun_softc	*tp;
1028 	struct ifnet		*ifp;
1029 	struct mbuf		*m;
1030 
1031 	if (kn->kn_status & KN_DETACHED) {
1032 		kn->kn_data = 0;
1033 		return (1);
1034 	}
1035 
1036 	tp = (struct tun_softc *)kn->kn_hook;
1037 	ifp = &tp->tun_if;
1038 
1039 	s = splnet();
1040 	IFQ_POLL(&ifp->if_snd, m);
1041 	if (m != NULL) {
1042 		splx(s);
1043 		kn->kn_data = ifp->if_snd.ifq_len;
1044 
1045 		TUNDEBUG(("%s: tunkqread q=%d\n", ifp->if_xname,
1046 		    ifp->if_snd.ifq_len));
1047 		return (1);
1048 	}
1049 	splx(s);
1050 	TUNDEBUG(("%s: tunkqread waiting\n", ifp->if_xname));
1051 	return (0);
1052 }
1053 
1054 void
1055 filt_tunwdetach(struct knote *kn)
1056 {
1057 	int			 s;
1058 	struct tun_softc	*tp;
1059 
1060 	tp = (struct tun_softc *)kn->kn_hook;
1061 	s = splhigh();
1062 	if (!(kn->kn_status & KN_DETACHED))
1063 		SLIST_REMOVE(&tp->tun_wsel.si_note, kn, knote, kn_selnext);
1064 	splx(s);
1065 }
1066 
1067 int
1068 filt_tunwrite(struct knote *kn, long hint)
1069 {
1070 	struct tun_softc	*tp;
1071 	struct ifnet		*ifp;
1072 
1073 	if (kn->kn_status & KN_DETACHED) {
1074 		kn->kn_data = 0;
1075 		return (1);
1076 	}
1077 
1078 	tp = (struct tun_softc *)kn->kn_hook;
1079 	ifp = &tp->tun_if;
1080 
1081 	kn->kn_data = ifp->if_mtu;
1082 
1083 	return (1);
1084 }
1085 
1086 /*
1087  * Start packet transmission on the interface.
1088  * when the interface queue is rate-limited by ALTQ or TBR,
1089  * if_start is needed to drain packets from the queue in order
1090  * to notify readers when outgoing packets become ready.
1091  * In layer 2 mode this function is called from ether_output.
1092  */
1093 static void
1094 tunstart(struct ifnet *ifp)
1095 {
1096 	struct tun_softc	*tp = ifp->if_softc;
1097 	struct mbuf		*m;
1098 
1099 	splassert(IPL_NET);
1100 
1101 	if (!(tp->tun_flags & TUN_LAYER2) &&
1102 	    !ALTQ_IS_ENABLED(&ifp->if_snd) &&
1103 	    !TBR_IS_ENABLED(&ifp->if_snd))
1104 		return;
1105 
1106 	IFQ_POLL(&ifp->if_snd, m);
1107 	if (m != NULL) {
1108 		if (tp->tun_flags & TUN_LAYER2) {
1109 #if NBPFILTER > 0
1110 			if (ifp->if_bpf)
1111 				bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT);
1112 #endif
1113 			ifp->if_opackets++;
1114 		}
1115 		tun_wakeup(tp);
1116 	}
1117 }
1118