xref: /openbsd-src/sys/net/if_tun.c (revision 850e275390052b330d93020bf619a739a3c277ac)
1 /*	$OpenBSD: if_tun.c,v 1.94 2008/08/04 18:55:08 damien Exp $	*/
2 /*	$NetBSD: if_tun.c,v 1.24 1996/05/07 02:40:48 thorpej Exp $	*/
3 
4 /*
5  * Copyright (c) 1988, Julian Onions <Julian.Onions@nexor.co.uk>
6  * Nottingham University 1987.
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29 
30 /*
31  * This driver takes packets off the IP i/f and hands them up to a
32  * user process to have its wicked way with. This driver has its
33  * roots in a similar driver written by Phil Cockcroft (formerly) at
34  * UCL. This driver is based much more on read/write/select mode of
35  * operation though.
36  */
37 
38 /* #define	TUN_DEBUG	9 */
39 
40 #include <sys/param.h>
41 #include <sys/kernel.h>
42 #include <sys/proc.h>
43 #include <sys/systm.h>
44 #include <sys/mbuf.h>
45 #include <sys/protosw.h>
46 #include <sys/socket.h>
47 #include <sys/ioctl.h>
48 #include <sys/errno.h>
49 #include <sys/syslog.h>
50 #include <sys/selinfo.h>
51 #include <sys/file.h>
52 #include <sys/time.h>
53 #include <sys/device.h>
54 #include <sys/vnode.h>
55 #include <sys/signalvar.h>
56 #include <sys/poll.h>
57 #include <sys/conf.h>
58 
59 #include <machine/cpu.h>
60 
61 #include <net/if.h>
62 #include <net/if_types.h>
63 #include <net/netisr.h>
64 #include <net/route.h>
65 
66 #ifdef INET
67 #include <netinet/in.h>
68 #include <netinet/in_systm.h>
69 #include <netinet/in_var.h>
70 #include <netinet/ip.h>
71 #include <netinet/if_ether.h>
72 #endif
73 
74 #ifdef NETATALK
75 #include <netatalk/at.h>
76 #include <netatalk/at_var.h>
77 #endif
78 
79 #include "bpfilter.h"
80 #if NBPFILTER > 0
81 #include <net/bpf.h>
82 #endif
83 
84 /* for arc4random() */
85 #include <dev/rndvar.h>
86 
87 #include <net/if_tun.h>
88 
89 struct tun_softc {
90 	struct arpcom	arpcom;		/* ethernet common data */
91 	u_short		tun_flags;	/* misc flags */
92 	pid_t		tun_pgid;	/* the process group - if any */
93 	uid_t		tun_siguid;	/* uid for process that set tun_pgid */
94 	uid_t		tun_sigeuid;	/* euid for process that set tun_pgid */
95 	struct selinfo	tun_rsel;	/* read select */
96 	struct selinfo	tun_wsel;	/* write select (not used) */
97 	int		tun_unit;
98 	LIST_ENTRY(tun_softc) tun_list;	/* all tunnel interfaces */
99 #define tun_if	arpcom.ac_if
100 };
101 
102 #ifdef	TUN_DEBUG
103 int	tundebug = TUN_DEBUG;
104 #define TUNDEBUG(a)	(tundebug? printf a : 0)
105 #else
106 #define TUNDEBUG(a)	/* (tundebug? printf a : 0) */
107 #endif
108 
109 /* Only these IFF flags are changeable by TUNSIFINFO */
110 #define TUN_IFF_FLAGS (IFF_UP|IFF_POINTOPOINT|IFF_MULTICAST|IFF_BROADCAST)
111 
112 void	tunattach(int);
113 int	tunopen(dev_t, int, int, struct proc *);
114 int	tunclose(dev_t, int, int, struct proc *);
115 int	tun_ioctl(struct ifnet *, u_long, caddr_t);
116 int	tun_output(struct ifnet *, struct mbuf *, struct sockaddr *,
117 	    struct rtentry *);
118 int	tunioctl(dev_t, u_long, caddr_t, int, struct proc *);
119 int	tunread(dev_t, struct uio *, int);
120 int	tunwrite(dev_t, struct uio *, int);
121 int	tunpoll(dev_t, int, struct proc *);
122 int	tunkqfilter(dev_t, struct knote *);
123 int	tun_clone_create(struct if_clone *, int);
124 int	tun_create(struct if_clone *, int, int);
125 int	tun_clone_destroy(struct ifnet *);
126 struct	tun_softc *tun_lookup(int);
127 void	tun_wakeup(struct tun_softc *);
128 int	tun_switch(struct tun_softc *, int);
129 
130 static int tuninit(struct tun_softc *);
131 static void tunstart(struct ifnet *);
132 int	filt_tunread(struct knote *, long);
133 int	filt_tunwrite(struct knote *, long);
134 void	filt_tunrdetach(struct knote *);
135 void	filt_tunwdetach(struct knote *);
136 
137 struct filterops tunread_filtops =
138 	{ 1, NULL, filt_tunrdetach, filt_tunread};
139 
140 struct filterops tunwrite_filtops =
141 	{ 1, NULL, filt_tunwdetach, filt_tunwrite};
142 
143 LIST_HEAD(, tun_softc) tun_softc_list;
144 
145 struct if_clone tun_cloner =
146     IF_CLONE_INITIALIZER("tun", tun_clone_create, tun_clone_destroy);
147 
148 void
149 tunattach(int n)
150 {
151 	LIST_INIT(&tun_softc_list);
152 	if_clone_attach(&tun_cloner);
153 }
154 
155 int
156 tun_clone_create(struct if_clone *ifc, int unit)
157 {
158 	return (tun_create(ifc, unit, 0));
159 }
160 
161 int
162 tun_create(struct if_clone *ifc, int unit, int flags)
163 {
164 	struct tun_softc	*tp;
165 	struct ifnet		*ifp;
166 	u_int32_t		 macaddr_rnd;
167 	int			 s;
168 
169 	tp = malloc(sizeof(*tp), M_DEVBUF, M_NOWAIT|M_ZERO);
170 	if (!tp)
171 		return (ENOMEM);
172 
173 	tp->tun_unit = unit;
174 	tp->tun_flags = TUN_INITED|TUN_STAYUP;
175 
176 	/* generate fake MAC address: 00 bd xx xx xx unit_no */
177 	tp->arpcom.ac_enaddr[0] = 0x00;
178 	tp->arpcom.ac_enaddr[1] = 0xbd;
179 	/*
180 	 * This no longer happens pre-scheduler so let's use the real
181 	 * random subsystem instead of random().
182 	 */
183 	macaddr_rnd = arc4random();
184 	bcopy(&macaddr_rnd, &tp->arpcom.ac_enaddr[2], sizeof(u_int32_t));
185 	tp->arpcom.ac_enaddr[5] = (u_char)unit + 1;
186 
187 	ifp = &tp->tun_if;
188 	snprintf(ifp->if_xname, sizeof ifp->if_xname, "%s%d", ifc->ifc_name,
189 	    unit);
190 	ifp->if_softc = tp;
191 	ifp->if_ioctl = tun_ioctl;
192 	ifp->if_output = tun_output;
193 	ifp->if_start = tunstart;
194 	IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen);
195 	IFQ_SET_READY(&ifp->if_snd);
196 	if ((flags & TUN_LAYER2) == 0) {
197 		tp->tun_flags &= ~TUN_LAYER2;
198 		ifp->if_mtu = ETHERMTU;
199 		ifp->if_flags = IFF_POINTOPOINT;
200 		ifp->if_type = IFT_TUNNEL;
201 		ifp->if_hdrlen = sizeof(u_int32_t);
202 		if_attach(ifp);
203 		if_alloc_sadl(ifp);
204 #if NBPFILTER > 0
205 		bpfattach(&ifp->if_bpf, ifp, DLT_LOOP, sizeof(u_int32_t));
206 #endif
207 	} else {
208 		tp->tun_flags |= TUN_LAYER2;
209 		ifp->if_flags =
210 		    (IFF_BROADCAST|IFF_SIMPLEX|IFF_MULTICAST|IFF_LINK0);
211 		if_attach(ifp);
212 		ether_ifattach(ifp);
213 	}
214 	/* force output function to our function */
215 	ifp->if_output = tun_output;
216 
217 	s = splnet();
218 	LIST_INSERT_HEAD(&tun_softc_list, tp, tun_list);
219 	splx(s);
220 
221 	return (0);
222 }
223 
224 int
225 tun_clone_destroy(struct ifnet *ifp)
226 {
227 	struct tun_softc	*tp = ifp->if_softc;
228 	int			 s;
229 
230 	tun_wakeup(tp);
231 
232 	s = splhigh();
233 	klist_invalidate(&tp->tun_rsel.si_note);
234 	klist_invalidate(&tp->tun_wsel.si_note);
235 	splx(s);
236 
237 	s = splnet();
238 	LIST_REMOVE(tp, tun_list);
239 	splx(s);
240 
241 	if (tp->tun_flags & TUN_LAYER2)
242 		ether_ifdetach(ifp);
243 
244 	if_detach(ifp);
245 
246 	free(tp, M_DEVBUF);
247 	return (0);
248 }
249 
250 struct tun_softc *
251 tun_lookup(int unit)
252 {
253 	struct tun_softc *tp;
254 
255 	LIST_FOREACH(tp, &tun_softc_list, tun_list)
256 		if (tp->tun_unit == unit)
257 			return (tp);
258 	return (NULL);
259 }
260 
261 int
262 tun_switch(struct tun_softc *tp, int flags)
263 {
264 	struct ifnet	*ifp = &tp->tun_if;
265 	int		 unit, open, r;
266 
267 	if ((tp->tun_flags & TUN_LAYER2) == (flags & TUN_LAYER2))
268 		return (0);
269 
270 	/* tp will be removed so store unit number */
271 	unit = tp->tun_unit;
272 	open = tp->tun_flags & (TUN_OPEN|TUN_NBIO|TUN_ASYNC);
273 	TUNDEBUG(("%s: switching to layer %d\n", ifp->if_xname,
274 		    flags & TUN_LAYER2 ? 2 : 3));
275 
276 	/* remove old device and ... */
277 	tun_clone_destroy(ifp);
278 	/* attach new interface */
279 	r = tun_create(&tun_cloner, unit, flags);
280 
281 	if (open && r == 0) {
282 		/* already opened before ifconfig tunX link0 */
283 		if ((tp = tun_lookup(unit)) == NULL)
284 			/* this should never fail */
285 			return (ENXIO);
286 		tp->tun_flags |= open;
287 		TUNDEBUG(("%s: already open\n", tp->tun_if.if_xname));
288 	}
289 	return (r);
290 }
291 
292 /*
293  * tunnel open - must be superuser & the device must be
294  * configured in
295  */
296 int
297 tunopen(dev_t dev, int flag, int mode, struct proc *p)
298 {
299 	struct tun_softc	*tp;
300 	struct ifnet		*ifp;
301 	int			 error, s;
302 
303 	if ((error = suser(p, 0)) != 0)
304 		return (error);
305 
306 	if ((tp = tun_lookup(minor(dev))) == NULL) {	/* create on demand */
307 		char	xname[IFNAMSIZ];
308 
309 		snprintf(xname, sizeof(xname), "%s%d", "tun", minor(dev));
310 		if ((error = if_clone_create(xname)) != 0)
311 			return (error);
312 
313 		if ((tp = tun_lookup(minor(dev))) == NULL)
314 			return (ENXIO);
315 		tp->tun_flags &= ~TUN_STAYUP;
316 	}
317 
318 	if (tp->tun_flags & TUN_OPEN)
319 		return (EBUSY);
320 
321 	ifp = &tp->tun_if;
322 	tp->tun_flags |= TUN_OPEN;
323 
324 	/* automatically UP the interface on open */
325 	s = splnet();
326 	if_up(ifp);
327 	ifp->if_flags |= IFF_RUNNING;
328 	splx(s);
329 
330 	TUNDEBUG(("%s: open\n", ifp->if_xname));
331 	return (0);
332 }
333 
334 /*
335  * tunclose - close the device; if closing the real device, flush pending
336  *  output and unless STAYUP bring down and destroy the interface.
337  */
338 int
339 tunclose(dev_t dev, int flag, int mode, struct proc *p)
340 {
341 	int			 s;
342 	struct tun_softc	*tp;
343 	struct ifnet		*ifp;
344 
345 	if ((tp = tun_lookup(minor(dev))) == NULL)
346 		return (ENXIO);
347 
348 	ifp = &tp->tun_if;
349 	tp->tun_flags &= ~(TUN_OPEN|TUN_NBIO|TUN_ASYNC);
350 	ifp->if_flags &= ~IFF_RUNNING;
351 
352 	/*
353 	 * junk all pending output
354 	 */
355 	s = splnet();
356 	IFQ_PURGE(&ifp->if_snd);
357 	splx(s);
358 
359 	TUNDEBUG(("%s: closed\n", ifp->if_xname));
360 
361 	if (!(tp->tun_flags & TUN_STAYUP))
362 		return (if_clone_destroy(ifp->if_xname));
363 	else {
364 		tp->tun_pgid = 0;
365 		selwakeup(&tp->tun_rsel);
366 		KNOTE(&tp->tun_rsel.si_note, 0);
367 	}
368 
369 	return (0);
370 }
371 
372 static int
373 tuninit(struct tun_softc *tp)
374 {
375 	struct ifnet	*ifp = &tp->tun_if;
376 	struct ifaddr	*ifa;
377 
378 	TUNDEBUG(("%s: tuninit\n", ifp->if_xname));
379 
380 	ifp->if_flags |= IFF_UP | IFF_RUNNING;
381 	ifp->if_flags &= ~IFF_OACTIVE; /* we are never active */
382 
383 	tp->tun_flags &= ~(TUN_IASET|TUN_DSTADDR|TUN_BRDADDR);
384 	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
385 #ifdef INET
386 		if (ifa->ifa_addr->sa_family == AF_INET) {
387 			struct sockaddr_in *sin;
388 
389 			sin = satosin(ifa->ifa_addr);
390 			if (sin && sin->sin_addr.s_addr)
391 				tp->tun_flags |= TUN_IASET;
392 
393 			if (ifp->if_flags & IFF_POINTOPOINT) {
394 				sin = satosin(ifa->ifa_dstaddr);
395 				if (sin && sin->sin_addr.s_addr)
396 					tp->tun_flags |= TUN_DSTADDR;
397 			} else
398 				tp->tun_flags &= ~TUN_DSTADDR;
399 
400 			if (ifp->if_flags & IFF_BROADCAST) {
401 				sin = satosin(ifa->ifa_broadaddr);
402 				if (sin && sin->sin_addr.s_addr)
403 					tp->tun_flags |= TUN_BRDADDR;
404 			} else
405 				tp->tun_flags &= ~TUN_BRDADDR;
406 		}
407 #endif
408 #ifdef INET6
409 		if (ifa->ifa_addr->sa_family == AF_INET6) {
410 			struct sockaddr_in6 *sin;
411 
412 			sin = (struct sockaddr_in6 *)ifa->ifa_addr;
413 			if (!IN6_IS_ADDR_UNSPECIFIED(&sin->sin6_addr))
414 				tp->tun_flags |= TUN_IASET;
415 
416 			if (ifp->if_flags & IFF_POINTOPOINT) {
417 				sin = (struct sockaddr_in6 *)ifa->ifa_dstaddr;
418 				if (sin &&
419 				    !IN6_IS_ADDR_UNSPECIFIED(&sin->sin6_addr))
420 					tp->tun_flags |= TUN_DSTADDR;
421 			} else
422 				tp->tun_flags &= ~TUN_DSTADDR;
423 		}
424 #endif /* INET6 */
425 	}
426 
427 	return (0);
428 }
429 
430 /*
431  * Process an ioctl request.
432  */
433 int
434 tun_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
435 {
436 	struct tun_softc	*tp = (struct tun_softc *)(ifp->if_softc);
437 	struct ifreq		*ifr = (struct ifreq *)data;
438 	int			 error = 0, s;
439 
440 	s = splnet();
441 	if (tp->tun_flags & TUN_LAYER2)
442 		if ((error = ether_ioctl(ifp, &tp->arpcom, cmd, data)) > 0) {
443 			splx(s);
444 			return (error);
445 		}
446 	switch (cmd) {
447 	case SIOCSIFADDR:
448 		tuninit(tp);
449 		TUNDEBUG(("%s: address set\n", ifp->if_xname));
450 		if (tp->tun_flags & TUN_LAYER2)
451 			switch (((struct ifaddr *)data)->ifa_addr->sa_family) {
452 #ifdef INET
453 			case AF_INET:
454 				arp_ifinit(&tp->arpcom, (struct ifaddr *)data);
455 				break;
456 #endif
457 			default:
458 				break;
459 			}
460 		break;
461 	case SIOCSIFDSTADDR:
462 		tuninit(tp);
463 		TUNDEBUG(("%s: destination address set\n", ifp->if_xname));
464 		break;
465 	case SIOCSIFBRDADDR:
466 		tuninit(tp);
467 		TUNDEBUG(("%s: broadcast address set\n", ifp->if_xname));
468 		break;
469 	case SIOCSIFMTU:
470 		if (ifr->ifr_mtu < ETHERMIN || ifr->ifr_mtu > TUNMRU)
471 			error = EINVAL;
472 		else
473 			ifp->if_mtu = ifr->ifr_mtu;
474 		break;
475 	case SIOCADDMULTI:
476 	case SIOCDELMULTI: {
477 		if (ifr == 0) {
478 			error = EAFNOSUPPORT;	   /* XXX */
479 			break;
480 		}
481 
482 		if (tp->tun_flags & TUN_LAYER2) {
483 			error = (cmd == SIOCADDMULTI) ?
484 			    ether_addmulti(ifr, &tp->arpcom) :
485 			    ether_delmulti(ifr, &tp->arpcom);
486 			if (error == ENETRESET) {
487 				/*
488 				 * Multicast list has changed; set the hardware
489 				 * filter accordingly. The good thing is we do
490 				 * not have a hardware filter (:
491 				 */
492 				error = 0;
493 			}
494 			break;
495 		}
496 
497 		switch (ifr->ifr_addr.sa_family) {
498 #ifdef INET
499 		case AF_INET:
500 			break;
501 #endif
502 #ifdef INET6
503 		case AF_INET6:
504 			break;
505 #endif
506 		default:
507 			error = EAFNOSUPPORT;
508 			break;
509 		}
510 		break;
511 	}
512 
513 	case SIOCSIFFLAGS:
514 		error = tun_switch(tp,
515 		    ifp->if_flags & IFF_LINK0 ? TUN_LAYER2 : 0);
516 		break;
517 	default:
518 		error = ENOTTY;
519 	}
520 	splx(s);
521 	return (error);
522 }
523 
524 /*
525  * tun_output - queue packets from higher level ready to put out.
526  */
527 int
528 tun_output(struct ifnet *ifp, struct mbuf *m0, struct sockaddr *dst,
529     struct rtentry *rt)
530 {
531 	struct tun_softc	*tp = ifp->if_softc;
532 	int			 s, len, error;
533 	u_int32_t		*af;
534 
535 	if ((ifp->if_flags & (IFF_UP|IFF_RUNNING)) != (IFF_UP|IFF_RUNNING)) {
536 		m_freem(m0);
537 		return (EHOSTDOWN);
538 	}
539 
540 	TUNDEBUG(("%s: tun_output\n", ifp->if_xname));
541 
542 	if ((tp->tun_flags & TUN_READY) != TUN_READY) {
543 		TUNDEBUG(("%s: not ready %#x\n", ifp->if_xname,
544 		     tp->tun_flags));
545 		m_freem(m0);
546 		return (EHOSTDOWN);
547 	}
548 
549 	if (tp->tun_flags & TUN_LAYER2)
550 		/* call ether_output and that will call tunstart at the end */
551 		return (ether_output(ifp, m0, dst, rt));
552 
553 	M_PREPEND(m0, sizeof(*af), M_DONTWAIT);
554 	if (m0 == NULL)
555 		return (ENOBUFS);
556 	af = mtod(m0, u_int32_t *);
557 	*af = htonl(dst->sa_family);
558 
559 #if NBPFILTER > 0
560 	if (ifp->if_bpf)
561 		bpf_mtap(ifp->if_bpf, m0, BPF_DIRECTION_OUT);
562 #endif
563 
564 	len = m0->m_pkthdr.len;
565 	s = splnet();
566 	IFQ_ENQUEUE(&ifp->if_snd, m0, NULL, error);
567 	if (error) {
568 		splx(s);
569 		ifp->if_collisions++;
570 		return (error);
571 	}
572 	splx(s);
573 	ifp->if_opackets++;
574 	ifp->if_obytes += len;
575 
576 	tun_wakeup(tp);
577 	return (0);
578 }
579 
580 void
581 tun_wakeup(struct tun_softc *tp)
582 {
583 	if (tp->tun_flags & TUN_RWAIT) {
584 		tp->tun_flags &= ~TUN_RWAIT;
585 		wakeup((caddr_t)tp);
586 	}
587 	if (tp->tun_flags & TUN_ASYNC && tp->tun_pgid)
588 		csignal(tp->tun_pgid, SIGIO,
589 		    tp->tun_siguid, tp->tun_sigeuid);
590 	selwakeup(&tp->tun_rsel);
591 	KNOTE(&tp->tun_rsel.si_note, 0);
592 }
593 
594 /*
595  * the cdevsw interface is now pretty minimal.
596  */
597 int
598 tunioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p)
599 {
600 	int			 s;
601 	struct tun_softc	*tp;
602 	struct tuninfo		*tunp;
603 	struct mbuf		*m;
604 
605 	if ((tp = tun_lookup(minor(dev))) == NULL)
606 		return (ENXIO);
607 
608 	s = splnet();
609 	switch (cmd) {
610 	case TUNSIFINFO:
611 		tunp = (struct tuninfo *)data;
612 		if (tunp->mtu < ETHERMIN || tunp->mtu > TUNMRU) {
613 			splx(s);
614 			return (EINVAL);
615 		}
616 		tp->tun_if.if_mtu = tunp->mtu;
617 		tp->tun_if.if_type = tunp->type;
618 		tp->tun_if.if_flags =
619 		    (tunp->flags & TUN_IFF_FLAGS) |
620 		    (tp->tun_if.if_flags & ~TUN_IFF_FLAGS);
621 		tp->tun_if.if_baudrate = tunp->baudrate;
622 		break;
623 	case TUNGIFINFO:
624 		tunp = (struct tuninfo *)data;
625 		tunp->mtu = tp->tun_if.if_mtu;
626 		tunp->type = tp->tun_if.if_type;
627 		tunp->flags = tp->tun_if.if_flags;
628 		tunp->baudrate = tp->tun_if.if_baudrate;
629 		break;
630 #ifdef TUN_DEBUG
631 	case TUNSDEBUG:
632 		tundebug = *(int *)data;
633 		break;
634 	case TUNGDEBUG:
635 		*(int *)data = tundebug;
636 		break;
637 #endif
638 	case TUNSIFMODE:
639 		switch (*(int *)data & (IFF_POINTOPOINT|IFF_BROADCAST)) {
640 		case IFF_POINTOPOINT:
641 		case IFF_BROADCAST:
642 			tp->tun_if.if_flags &= ~TUN_IFF_FLAGS;
643 			tp->tun_if.if_flags |= *(int *)data & TUN_IFF_FLAGS;
644 			break;
645 		default:
646 			splx(s);
647 			return (EINVAL);
648 		}
649 		break;
650 
651 	case FIONBIO:
652 		if (*(int *)data)
653 			tp->tun_flags |= TUN_NBIO;
654 		else
655 			tp->tun_flags &= ~TUN_NBIO;
656 		break;
657 	case FIOASYNC:
658 		if (*(int *)data)
659 			tp->tun_flags |= TUN_ASYNC;
660 		else
661 			tp->tun_flags &= ~TUN_ASYNC;
662 		break;
663 	case FIONREAD:
664 		IFQ_POLL(&tp->tun_if.if_snd, m);
665 		if (m != NULL)
666 			*(int *)data = m->m_pkthdr.len;
667 		else
668 			*(int *)data = 0;
669 		break;
670 	case TIOCSPGRP:
671 		tp->tun_pgid = *(int *)data;
672 		tp->tun_siguid = p->p_cred->p_ruid;
673 		tp->tun_sigeuid = p->p_ucred->cr_uid;
674 		break;
675 	case TIOCGPGRP:
676 		*(int *)data = tp->tun_pgid;
677 		break;
678 	case OSIOCGIFADDR:
679 	case SIOCGIFADDR:
680 		if (!(tp->tun_flags & TUN_LAYER2)) {
681 			splx(s);
682 			return (EINVAL);
683 		}
684 		bcopy(tp->arpcom.ac_enaddr, data,
685 		    sizeof(tp->arpcom.ac_enaddr));
686 		break;
687 
688 	case SIOCSIFADDR:
689 		if (!(tp->tun_flags & TUN_LAYER2)) {
690 			splx(s);
691 			return (EINVAL);
692 		}
693 		bcopy(data, tp->arpcom.ac_enaddr,
694 		    sizeof(tp->arpcom.ac_enaddr));
695 		break;
696 	default:
697 		splx(s);
698 		return (ENOTTY);
699 	}
700 	splx(s);
701 	return (0);
702 }
703 
704 /*
705  * The cdevsw read interface - reads a packet at a time, or at
706  * least as much of a packet as can be read.
707  */
708 int
709 tunread(dev_t dev, struct uio *uio, int ioflag)
710 {
711 	struct tun_softc	*tp;
712 	struct ifnet		*ifp;
713 	struct mbuf		*m, *m0;
714 	int			 error = 0, len, s;
715 
716 	if ((tp = tun_lookup(minor(dev))) == NULL)
717 		return (ENXIO);
718 
719 	ifp = &tp->tun_if;
720 	TUNDEBUG(("%s: read\n", ifp->if_xname));
721 	if ((tp->tun_flags & TUN_READY) != TUN_READY) {
722 		TUNDEBUG(("%s: not ready %#x\n", ifp->if_xname, tp->tun_flags));
723 		return (EHOSTDOWN);
724 	}
725 
726 	tp->tun_flags &= ~TUN_RWAIT;
727 
728 	s = splnet();
729 	do {
730 		while ((tp->tun_flags & TUN_READY) != TUN_READY)
731 			if ((error = tsleep((caddr_t)tp,
732 			    (PZERO + 1)|PCATCH, "tunread", 0)) != 0) {
733 				splx(s);
734 				return (error);
735 			}
736 		IFQ_DEQUEUE(&ifp->if_snd, m0);
737 		if (m0 == NULL) {
738 			if (tp->tun_flags & TUN_NBIO && ioflag & IO_NDELAY) {
739 				splx(s);
740 				return (EWOULDBLOCK);
741 			}
742 			tp->tun_flags |= TUN_RWAIT;
743 			if ((error = tsleep((caddr_t)tp,
744 			    (PZERO + 1)|PCATCH, "tunread", 0)) != 0) {
745 				splx(s);
746 				return (error);
747 			}
748 		}
749 	} while (m0 == NULL);
750 	splx(s);
751 
752 	while (m0 != NULL && uio->uio_resid > 0 && error == 0) {
753 		len = min(uio->uio_resid, m0->m_len);
754 		if (len != 0)
755 			error = uiomove(mtod(m0, caddr_t), len, uio);
756 		MFREE(m0, m);
757 		m0 = m;
758 	}
759 
760 	if (m0 != NULL) {
761 		TUNDEBUG(("Dropping mbuf\n"));
762 		m_freem(m0);
763 	}
764 	if (error)
765 		ifp->if_ierrors++;
766 
767 	return (error);
768 }
769 
770 /*
771  * the cdevsw write interface - an atomic write is a packet - or else!
772  */
773 int
774 tunwrite(dev_t dev, struct uio *uio, int ioflag)
775 {
776 	struct tun_softc	*tp;
777 	struct ifnet		*ifp;
778 	struct ifqueue		*ifq;
779 	u_int32_t		*th;
780 	struct mbuf		*top, **mp, *m;
781 	int			 isr;
782 	int			 error=0, s, tlen, mlen;
783 
784 	if ((tp = tun_lookup(minor(dev))) == NULL)
785 		return (ENXIO);
786 
787 	ifp = &tp->tun_if;
788 	TUNDEBUG(("%s: tunwrite\n", ifp->if_xname));
789 
790 	if (uio->uio_resid == 0 || uio->uio_resid > ifp->if_mtu +
791 	    (tp->tun_flags & TUN_LAYER2 ? ETHER_HDR_LEN : sizeof(*th))) {
792 		TUNDEBUG(("%s: len=%d!\n", ifp->if_xname, uio->uio_resid));
793 		return (EMSGSIZE);
794 	}
795 	tlen = uio->uio_resid;
796 
797 	/* get a header mbuf */
798 	MGETHDR(m, M_DONTWAIT, MT_DATA);
799 	if (m == NULL)
800 		return (ENOBUFS);
801 	mlen = MHLEN;
802 	if (uio->uio_resid >= MINCLSIZE) {
803 		MCLGET(m, M_DONTWAIT);
804 		if (!(m->m_flags & M_EXT)) {
805 			m_free(m);
806 			return (ENOBUFS);
807 		}
808 		mlen = MCLBYTES;
809 	}
810 
811 	top = NULL;
812 	mp = &top;
813 	if (tp->tun_flags & TUN_LAYER2) {
814 		/*
815 		 * Pad so that IP header is correctly aligned
816 		 * this is necessary for all strict aligned architectures.
817 		 */
818 		mlen -= ETHER_ALIGN;
819 		m->m_data += ETHER_ALIGN;
820 	}
821 	while (error == 0 && uio->uio_resid > 0) {
822 		m->m_len = min(mlen, uio->uio_resid);
823 		error = uiomove(mtod (m, caddr_t), m->m_len, uio);
824 		*mp = m;
825 		mp = &m->m_next;
826 		if (error == 0 && uio->uio_resid > 0) {
827 			MGET(m, M_DONTWAIT, MT_DATA);
828 			if (m == NULL) {
829 				error = ENOBUFS;
830 				break;
831 			}
832 			mlen = MLEN;
833 			if (uio->uio_resid >= MINCLSIZE) {
834 				MCLGET(m, M_DONTWAIT);
835 				if (!(m->m_flags & M_EXT)) {
836 					error = ENOBUFS;
837 					m_free(m);
838 					break;
839 				}
840 				mlen = MCLBYTES;
841 			}
842 		}
843 	}
844 	if (error) {
845 		if (top != NULL)
846 			m_freem(top);
847 		ifp->if_ierrors++;
848 		return (error);
849 	}
850 
851 	top->m_pkthdr.len = tlen;
852 	top->m_pkthdr.rcvif = ifp;
853 
854 #if NBPFILTER > 0
855 	if (ifp->if_bpf)
856 		bpf_mtap(ifp->if_bpf, top, BPF_DIRECTION_IN);
857 #endif
858 
859 	if (tp->tun_flags & TUN_LAYER2) {
860 		/* quirk to not add randomness from a virtual device */
861 		atomic_setbits_int(&netisr, (1 << NETISR_RND_DONE));
862 
863 		ether_input_mbuf(ifp, top);
864 		ifp->if_ipackets++; /* ibytes are counted in ether_input */
865 		return (0);
866 	}
867 
868 	th = mtod(top, u_int32_t *);
869 	/* strip the tunnel header */
870 	top->m_data += sizeof(*th);
871 	top->m_len  -= sizeof(*th);
872 	top->m_pkthdr.len -= sizeof(*th);
873 
874 	switch (ntohl(*th)) {
875 #ifdef INET
876 	case AF_INET:
877 		ifq = &ipintrq;
878 		isr = NETISR_IP;
879 		break;
880 #endif
881 #ifdef INET6
882 	case AF_INET6:
883 		ifq = &ip6intrq;
884 		isr = NETISR_IPV6;
885 		break;
886 #endif
887 #ifdef NETATALK
888 	case AF_APPLETALK:
889 		ifq = &atintrq2;
890 		isr = NETISR_ATALK;
891 		break;
892 #endif
893 	default:
894 		m_freem(top);
895 		return (EAFNOSUPPORT);
896 	}
897 
898 	s = splnet();
899 	if (IF_QFULL(ifq)) {
900 		IF_DROP(ifq);
901 		splx(s);
902 		ifp->if_collisions++;
903 		m_freem(top);
904 		if (!ifq->ifq_congestion)
905 			if_congestion(ifq);
906 		return (ENOBUFS);
907 	}
908 	IF_ENQUEUE(ifq, top);
909 	schednetisr(isr);
910 	ifp->if_ipackets++;
911 	ifp->if_ibytes += top->m_pkthdr.len;
912 	splx(s);
913 	return (error);
914 }
915 
916 /*
917  * tunpoll - the poll interface, this is only useful on reads
918  * really. The write detect always returns true, write never blocks
919  * anyway, it either accepts the packet or drops it.
920  */
921 int
922 tunpoll(dev_t dev, int events, struct proc *p)
923 {
924 	int			 revents, s;
925 	struct tun_softc	*tp;
926 	struct ifnet		*ifp;
927 	struct mbuf		*m;
928 
929 	if ((tp = tun_lookup(minor(dev))) == NULL)
930 		return (POLLERR);
931 
932 	ifp = &tp->tun_if;
933 	revents = 0;
934 	s = splnet();
935 	TUNDEBUG(("%s: tunpoll\n", ifp->if_xname));
936 
937 	if (events & (POLLIN | POLLRDNORM)) {
938 		IFQ_POLL(&ifp->if_snd, m);
939 		if (m != NULL) {
940 			TUNDEBUG(("%s: tunselect q=%d\n", ifp->if_xname,
941 			    ifp->if_snd.ifq_len));
942 			revents |= events & (POLLIN | POLLRDNORM);
943 		} else {
944 			TUNDEBUG(("%s: tunpoll waiting\n", ifp->if_xname));
945 			selrecord(p, &tp->tun_rsel);
946 		}
947 	}
948 	if (events & (POLLOUT | POLLWRNORM))
949 		revents |= events & (POLLOUT | POLLWRNORM);
950 	splx(s);
951 	return (revents);
952 }
953 
954 /*
955  * kqueue(2) support.
956  *
957  * The tun driver uses an array of tun_softc's based on the minor number
958  * of the device.  kn->kn_hook gets set to the specific tun_softc.
959  *
960  * filt_tunread() sets kn->kn_data to the iface qsize
961  * filt_tunwrite() sets kn->kn_data to the MTU size
962  */
963 int
964 tunkqfilter(dev_t dev, struct knote *kn)
965 {
966 	int			 s;
967 	struct klist		*klist;
968 	struct tun_softc	*tp;
969 	struct ifnet		*ifp;
970 
971 	if ((tp = tun_lookup(minor(dev))) == NULL)
972 		return (ENXIO);
973 
974 	ifp = &tp->tun_if;
975 
976 	s = splnet();
977 	TUNDEBUG(("%s: tunkqfilter\n", ifp->if_xname));
978 	splx(s);
979 
980 	switch (kn->kn_filter) {
981 		case EVFILT_READ:
982 			klist = &tp->tun_rsel.si_note;
983 			kn->kn_fop = &tunread_filtops;
984 			break;
985 		case EVFILT_WRITE:
986 			klist = &tp->tun_wsel.si_note;
987 			kn->kn_fop = &tunwrite_filtops;
988 			break;
989 		default:
990 			return (EPERM);	/* 1 */
991 	}
992 
993 	kn->kn_hook = (caddr_t)tp;
994 
995 	s = splhigh();
996 	SLIST_INSERT_HEAD(klist, kn, kn_selnext);
997 	splx(s);
998 
999 	return (0);
1000 }
1001 
1002 void
1003 filt_tunrdetach(struct knote *kn)
1004 {
1005 	int			 s;
1006 	struct tun_softc	*tp;
1007 
1008 	tp = (struct tun_softc *)kn->kn_hook;
1009 	s = splhigh();
1010 	if (!(kn->kn_status & KN_DETACHED))
1011 		SLIST_REMOVE(&tp->tun_rsel.si_note, kn, knote, kn_selnext);
1012 	splx(s);
1013 }
1014 
1015 int
1016 filt_tunread(struct knote *kn, long hint)
1017 {
1018 	int			 s;
1019 	struct tun_softc	*tp;
1020 	struct ifnet		*ifp;
1021 	struct mbuf		*m;
1022 
1023 	if (kn->kn_status & KN_DETACHED) {
1024 		kn->kn_data = 0;
1025 		return (1);
1026 	}
1027 
1028 	tp = (struct tun_softc *)kn->kn_hook;
1029 	ifp = &tp->tun_if;
1030 
1031 	s = splnet();
1032 	IFQ_POLL(&ifp->if_snd, m);
1033 	if (m != NULL) {
1034 		splx(s);
1035 		kn->kn_data = ifp->if_snd.ifq_len;
1036 
1037 		TUNDEBUG(("%s: tunkqread q=%d\n", ifp->if_xname,
1038 		    ifp->if_snd.ifq_len));
1039 		return (1);
1040 	}
1041 	splx(s);
1042 	TUNDEBUG(("%s: tunkqread waiting\n", ifp->if_xname));
1043 	return (0);
1044 }
1045 
1046 void
1047 filt_tunwdetach(struct knote *kn)
1048 {
1049 	int			 s;
1050 	struct tun_softc	*tp;
1051 
1052 	tp = (struct tun_softc *)kn->kn_hook;
1053 	s = splhigh();
1054 	if (!(kn->kn_status & KN_DETACHED))
1055 		SLIST_REMOVE(&tp->tun_wsel.si_note, kn, knote, kn_selnext);
1056 	splx(s);
1057 }
1058 
1059 int
1060 filt_tunwrite(struct knote *kn, long hint)
1061 {
1062 	struct tun_softc	*tp;
1063 	struct ifnet		*ifp;
1064 
1065 	if (kn->kn_status & KN_DETACHED) {
1066 		kn->kn_data = 0;
1067 		return (1);
1068 	}
1069 
1070 	tp = (struct tun_softc *)kn->kn_hook;
1071 	ifp = &tp->tun_if;
1072 
1073 	kn->kn_data = ifp->if_mtu;
1074 
1075 	return (1);
1076 }
1077 
1078 /*
1079  * Start packet transmission on the interface.
1080  * when the interface queue is rate-limited by ALTQ or TBR,
1081  * if_start is needed to drain packets from the queue in order
1082  * to notify readers when outgoing packets become ready.
1083  * In layer 2 mode this function is called from ether_output.
1084  */
1085 static void
1086 tunstart(struct ifnet *ifp)
1087 {
1088 	struct tun_softc	*tp = ifp->if_softc;
1089 	struct mbuf		*m;
1090 
1091 	if (!(tp->tun_flags & TUN_LAYER2) &&
1092 	    !ALTQ_IS_ENABLED(&ifp->if_snd) &&
1093 	    !TBR_IS_ENABLED(&ifp->if_snd))
1094 		return;
1095 
1096 	IFQ_POLL(&ifp->if_snd, m);
1097 	if (m != NULL) {
1098 		if (tp->tun_flags & TUN_LAYER2) {
1099 #if NBPFILTER > 0
1100 			if (ifp->if_bpf)
1101 				bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT);
1102 #endif
1103 			ifp->if_opackets++;
1104 		}
1105 		tun_wakeup(tp);
1106 	}
1107 }
1108