xref: /openbsd-src/sys/net/if_tun.c (revision 43003dfe3ad45d1698bed8a37f2b0f5b14f20d4f)
1 /*	$OpenBSD: if_tun.c,v 1.99 2009/08/09 10:09:12 claudio Exp $	*/
2 /*	$NetBSD: if_tun.c,v 1.24 1996/05/07 02:40:48 thorpej Exp $	*/
3 
4 /*
5  * Copyright (c) 1988, Julian Onions <Julian.Onions@nexor.co.uk>
6  * Nottingham University 1987.
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29 
30 /*
31  * This driver takes packets off the IP i/f and hands them up to a
32  * user process to have its wicked way with. This driver has its
33  * roots in a similar driver written by Phil Cockcroft (formerly) at
34  * UCL. This driver is based much more on read/write/select mode of
35  * operation though.
36  */
37 
38 /* #define	TUN_DEBUG	9 */
39 
40 #include <sys/param.h>
41 #include <sys/kernel.h>
42 #include <sys/proc.h>
43 #include <sys/systm.h>
44 #include <sys/mbuf.h>
45 #include <sys/protosw.h>
46 #include <sys/socket.h>
47 #include <sys/ioctl.h>
48 #include <sys/errno.h>
49 #include <sys/syslog.h>
50 #include <sys/selinfo.h>
51 #include <sys/file.h>
52 #include <sys/time.h>
53 #include <sys/device.h>
54 #include <sys/vnode.h>
55 #include <sys/signalvar.h>
56 #include <sys/poll.h>
57 #include <sys/conf.h>
58 
59 #include <machine/cpu.h>
60 
61 #include <net/if.h>
62 #include <net/if_media.h>
63 #include <net/if_types.h>
64 #include <net/netisr.h>
65 #include <net/route.h>
66 
67 #ifdef INET
68 #include <netinet/in.h>
69 #include <netinet/in_systm.h>
70 #include <netinet/in_var.h>
71 #include <netinet/ip.h>
72 #include <netinet/if_ether.h>
73 #endif
74 
75 #ifdef NETATALK
76 #include <netatalk/at.h>
77 #include <netatalk/at_var.h>
78 #endif
79 
80 #include "bpfilter.h"
81 #if NBPFILTER > 0
82 #include <net/bpf.h>
83 #endif
84 
85 /* for arc4random() */
86 #include <dev/rndvar.h>
87 
88 #include <net/if_tun.h>
89 
90 struct tun_softc {
91 	struct arpcom	arpcom;		/* ethernet common data */
92 	struct selinfo	tun_rsel;	/* read select */
93 	struct selinfo	tun_wsel;	/* write select (not used) */
94 	LIST_ENTRY(tun_softc) tun_list;	/* all tunnel interfaces */
95 	struct ifmedia	tun_media;
96 	int		tun_unit;
97 	uid_t		tun_siguid;	/* uid for process that set tun_pgid */
98 	uid_t		tun_sigeuid;	/* euid for process that set tun_pgid */
99 	pid_t		tun_pgid;	/* the process group - if any */
100 	u_short		tun_flags;	/* misc flags */
101 #define tun_if	arpcom.ac_if
102 };
103 
104 #ifdef	TUN_DEBUG
105 int	tundebug = TUN_DEBUG;
106 #define TUNDEBUG(a)	(tundebug? printf a : 0)
107 #else
108 #define TUNDEBUG(a)	/* (tundebug? printf a : 0) */
109 #endif
110 
111 /* Only these IFF flags are changeable by TUNSIFINFO */
112 #define TUN_IFF_FLAGS (IFF_UP|IFF_POINTOPOINT|IFF_MULTICAST|IFF_BROADCAST)
113 
114 void	tunattach(int);
115 int	tunopen(dev_t, int, int, struct proc *);
116 int	tunclose(dev_t, int, int, struct proc *);
117 int	tun_ioctl(struct ifnet *, u_long, caddr_t);
118 int	tun_output(struct ifnet *, struct mbuf *, struct sockaddr *,
119 	    struct rtentry *);
120 int	tunioctl(dev_t, u_long, caddr_t, int, struct proc *);
121 int	tunread(dev_t, struct uio *, int);
122 int	tunwrite(dev_t, struct uio *, int);
123 int	tunpoll(dev_t, int, struct proc *);
124 int	tunkqfilter(dev_t, struct knote *);
125 int	tun_clone_create(struct if_clone *, int);
126 int	tun_create(struct if_clone *, int, int);
127 int	tun_clone_destroy(struct ifnet *);
128 struct	tun_softc *tun_lookup(int);
129 void	tun_wakeup(struct tun_softc *);
130 int	tun_switch(struct tun_softc *, int);
131 
132 int	tuninit(struct tun_softc *);
133 int	filt_tunread(struct knote *, long);
134 int	filt_tunwrite(struct knote *, long);
135 void	filt_tunrdetach(struct knote *);
136 void	filt_tunwdetach(struct knote *);
137 void	tunstart(struct ifnet *);
138 void	tun_link_state(struct tun_softc *);
139 int	tun_media_change(struct ifnet *);
140 void	tun_media_status(struct ifnet *, struct ifmediareq *);
141 
142 struct filterops tunread_filtops =
143 	{ 1, NULL, filt_tunrdetach, filt_tunread};
144 
145 struct filterops tunwrite_filtops =
146 	{ 1, NULL, filt_tunwdetach, filt_tunwrite};
147 
148 LIST_HEAD(, tun_softc) tun_softc_list;
149 
150 struct if_clone tun_cloner =
151     IF_CLONE_INITIALIZER("tun", tun_clone_create, tun_clone_destroy);
152 
153 void
154 tunattach(int n)
155 {
156 	LIST_INIT(&tun_softc_list);
157 	if_clone_attach(&tun_cloner);
158 }
159 
160 int
161 tun_clone_create(struct if_clone *ifc, int unit)
162 {
163 	return (tun_create(ifc, unit, 0));
164 }
165 
166 int
167 tun_create(struct if_clone *ifc, int unit, int flags)
168 {
169 	struct tun_softc	*tp;
170 	struct ifnet		*ifp;
171 	u_int32_t		 macaddr_rnd;
172 	int			 s;
173 
174 	tp = malloc(sizeof(*tp), M_DEVBUF, M_NOWAIT|M_ZERO);
175 	if (!tp)
176 		return (ENOMEM);
177 
178 	tp->tun_unit = unit;
179 	tp->tun_flags = TUN_INITED|TUN_STAYUP;
180 
181 	/* generate fake MAC address: 00 bd xx xx xx unit_no */
182 	tp->arpcom.ac_enaddr[0] = 0x00;
183 	tp->arpcom.ac_enaddr[1] = 0xbd;
184 	/*
185 	 * This no longer happens pre-scheduler so let's use the real
186 	 * random subsystem instead of random().
187 	 */
188 	macaddr_rnd = arc4random();
189 	bcopy(&macaddr_rnd, &tp->arpcom.ac_enaddr[2], sizeof(u_int32_t));
190 	tp->arpcom.ac_enaddr[5] = (u_char)unit + 1;
191 
192 	ifp = &tp->tun_if;
193 	snprintf(ifp->if_xname, sizeof ifp->if_xname, "%s%d", ifc->ifc_name,
194 	    unit);
195 	ifp->if_softc = tp;
196 	ifp->if_ioctl = tun_ioctl;
197 	ifp->if_output = tun_output;
198 	ifp->if_start = tunstart;
199 	ifp->if_hardmtu = TUNMRU;
200 	IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen);
201 	IFQ_SET_READY(&ifp->if_snd);
202 
203 	ifmedia_init(&tp->tun_media, 0, tun_media_change, tun_media_status);
204 	ifmedia_add(&tp->tun_media, IFM_ETHER | IFM_AUTO, 0, NULL);
205 	ifmedia_set(&tp->tun_media, IFM_ETHER | IFM_AUTO);
206 
207 	if ((flags & TUN_LAYER2) == 0) {
208 		tp->tun_flags &= ~TUN_LAYER2;
209 		ifp->if_mtu = ETHERMTU;
210 		ifp->if_flags = IFF_POINTOPOINT;
211 		ifp->if_type = IFT_TUNNEL;
212 		ifp->if_hdrlen = sizeof(u_int32_t);
213 
214 		if_attach(ifp);
215 		if_alloc_sadl(ifp);
216 #if NBPFILTER > 0
217 		bpfattach(&ifp->if_bpf, ifp, DLT_LOOP, sizeof(u_int32_t));
218 #endif
219 	} else {
220 		tp->tun_flags |= TUN_LAYER2;
221 		ifp->if_flags =
222 		    (IFF_BROADCAST|IFF_SIMPLEX|IFF_MULTICAST|IFF_LINK0);
223 		ifp->if_capabilities = IFCAP_VLAN_MTU;
224 
225 		if_attach(ifp);
226 		ether_ifattach(ifp);
227 	}
228 	/* force output function to our function */
229 	ifp->if_output = tun_output;
230 
231 	s = splnet();
232 	LIST_INSERT_HEAD(&tun_softc_list, tp, tun_list);
233 	splx(s);
234 
235 	return (0);
236 }
237 
238 int
239 tun_clone_destroy(struct ifnet *ifp)
240 {
241 	struct tun_softc	*tp = ifp->if_softc;
242 	int			 s;
243 
244 	tun_wakeup(tp);
245 
246 	s = splhigh();
247 	klist_invalidate(&tp->tun_rsel.si_note);
248 	klist_invalidate(&tp->tun_wsel.si_note);
249 	splx(s);
250 
251 	s = splnet();
252 	LIST_REMOVE(tp, tun_list);
253 	splx(s);
254 
255 	if (tp->tun_flags & TUN_LAYER2)
256 		ether_ifdetach(ifp);
257 
258 	if_detach(ifp);
259 
260 	free(tp, M_DEVBUF);
261 	return (0);
262 }
263 
264 struct tun_softc *
265 tun_lookup(int unit)
266 {
267 	struct tun_softc *tp;
268 
269 	LIST_FOREACH(tp, &tun_softc_list, tun_list)
270 		if (tp->tun_unit == unit)
271 			return (tp);
272 	return (NULL);
273 }
274 
275 int
276 tun_switch(struct tun_softc *tp, int flags)
277 {
278 	struct ifnet		*ifp = &tp->tun_if;
279 	int			 unit, open, r, s;
280 	struct ifg_list		*ifgl;
281 	u_int			ifgr_len;
282 	char			*ifgrpnames, *p;
283 
284 	if ((tp->tun_flags & TUN_LAYER2) == (flags & TUN_LAYER2))
285 		return (0);
286 
287 	/* tp will be removed so store unit number */
288 	unit = tp->tun_unit;
289 	open = tp->tun_flags & (TUN_OPEN|TUN_NBIO|TUN_ASYNC);
290 	TUNDEBUG(("%s: switching to layer %d\n", ifp->if_xname,
291 		    flags & TUN_LAYER2 ? 2 : 3));
292 
293 	/* remember joined groups */
294 	ifgr_len = 0;
295 	ifgrpnames = NULL;
296 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
297 		ifgr_len += IFNAMSIZ;
298 	if (ifgr_len)
299 		ifgrpnames = malloc(ifgr_len + 1, M_TEMP, M_NOWAIT|M_ZERO);
300 	if (ifgrpnames) {
301 		p = ifgrpnames;
302 		TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
303 			strlcpy(p, ifgl->ifgl_group->ifg_group, IFNAMSIZ);
304 			p += IFNAMSIZ;
305 		}
306 	}
307 
308 	/* remove old device and ... */
309 	tun_clone_destroy(ifp);
310 	/* attach new interface */
311 	r = tun_create(&tun_cloner, unit, flags);
312 
313 	if (r == 0) {
314 		if ((tp = tun_lookup(unit)) == NULL) {
315 			/* this should never fail */
316 			r = ENXIO;
317 			goto abort;
318 		}
319 
320 		/* rejoin groups */
321 		ifp = &tp->tun_if;
322 		for (p = ifgrpnames; p && *p; p += IFNAMSIZ)
323 			if_addgroup(ifp, p);
324 	}
325 	if (open && r == 0) {
326 		/* already opened before ifconfig tunX link0 */
327 		s = splnet();
328 		tp->tun_flags |= open;
329 		tun_link_state(tp);
330 		splx(s);
331 		TUNDEBUG(("%s: already open\n", tp->tun_if.if_xname));
332 	}
333  abort:
334 	if (ifgrpnames)
335 		free(ifgrpnames, M_TEMP);
336 	return (r);
337 }
338 
339 /*
340  * tunnel open - must be superuser & the device must be
341  * configured in
342  */
343 int
344 tunopen(dev_t dev, int flag, int mode, struct proc *p)
345 {
346 	struct tun_softc	*tp;
347 	struct ifnet		*ifp;
348 	int			 error, s;
349 
350 	if ((error = suser(p, 0)) != 0)
351 		return (error);
352 
353 	if ((tp = tun_lookup(minor(dev))) == NULL) {	/* create on demand */
354 		char	xname[IFNAMSIZ];
355 
356 		snprintf(xname, sizeof(xname), "%s%d", "tun", minor(dev));
357 		if ((error = if_clone_create(xname)) != 0)
358 			return (error);
359 
360 		if ((tp = tun_lookup(minor(dev))) == NULL)
361 			return (ENXIO);
362 		tp->tun_flags &= ~TUN_STAYUP;
363 	}
364 
365 	if (tp->tun_flags & TUN_OPEN)
366 		return (EBUSY);
367 
368 	ifp = &tp->tun_if;
369 	tp->tun_flags |= TUN_OPEN;
370 
371 	/* automatically UP the interface on open */
372 	s = splnet();
373 	ifp->if_flags |= IFF_RUNNING;
374 	tun_link_state(tp);
375 	if_up(ifp);
376 	splx(s);
377 
378 	TUNDEBUG(("%s: open\n", ifp->if_xname));
379 	return (0);
380 }
381 
382 /*
383  * tunclose - close the device; if closing the real device, flush pending
384  *  output and unless STAYUP bring down and destroy the interface.
385  */
386 int
387 tunclose(dev_t dev, int flag, int mode, struct proc *p)
388 {
389 	int			 s;
390 	struct tun_softc	*tp;
391 	struct ifnet		*ifp;
392 
393 	if ((tp = tun_lookup(minor(dev))) == NULL)
394 		return (ENXIO);
395 
396 	ifp = &tp->tun_if;
397 	tp->tun_flags &= ~(TUN_OPEN|TUN_NBIO|TUN_ASYNC);
398 
399 	/*
400 	 * junk all pending output
401 	 */
402 	s = splnet();
403 	ifp->if_flags &= ~IFF_RUNNING;
404 	tun_link_state(tp);
405 	IFQ_PURGE(&ifp->if_snd);
406 	splx(s);
407 
408 	TUNDEBUG(("%s: closed\n", ifp->if_xname));
409 
410 	if (!(tp->tun_flags & TUN_STAYUP))
411 		return (if_clone_destroy(ifp->if_xname));
412 	else {
413 		tp->tun_pgid = 0;
414 		selwakeup(&tp->tun_rsel);
415 		KNOTE(&tp->tun_rsel.si_note, 0);
416 	}
417 
418 	return (0);
419 }
420 
421 int
422 tuninit(struct tun_softc *tp)
423 {
424 	struct ifnet	*ifp = &tp->tun_if;
425 	struct ifaddr	*ifa;
426 
427 	TUNDEBUG(("%s: tuninit\n", ifp->if_xname));
428 
429 	ifp->if_flags |= IFF_UP | IFF_RUNNING;
430 	ifp->if_flags &= ~IFF_OACTIVE; /* we are never active */
431 
432 	tp->tun_flags &= ~(TUN_IASET|TUN_DSTADDR|TUN_BRDADDR);
433 	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
434 #ifdef INET
435 		if (ifa->ifa_addr->sa_family == AF_INET) {
436 			struct sockaddr_in *sin;
437 
438 			sin = satosin(ifa->ifa_addr);
439 			if (sin && sin->sin_addr.s_addr)
440 				tp->tun_flags |= TUN_IASET;
441 
442 			if (ifp->if_flags & IFF_POINTOPOINT) {
443 				sin = satosin(ifa->ifa_dstaddr);
444 				if (sin && sin->sin_addr.s_addr)
445 					tp->tun_flags |= TUN_DSTADDR;
446 			} else
447 				tp->tun_flags &= ~TUN_DSTADDR;
448 
449 			if (ifp->if_flags & IFF_BROADCAST) {
450 				sin = satosin(ifa->ifa_broadaddr);
451 				if (sin && sin->sin_addr.s_addr)
452 					tp->tun_flags |= TUN_BRDADDR;
453 			} else
454 				tp->tun_flags &= ~TUN_BRDADDR;
455 		}
456 #endif
457 #ifdef INET6
458 		if (ifa->ifa_addr->sa_family == AF_INET6) {
459 			struct sockaddr_in6 *sin;
460 
461 			sin = (struct sockaddr_in6 *)ifa->ifa_addr;
462 			if (!IN6_IS_ADDR_UNSPECIFIED(&sin->sin6_addr))
463 				tp->tun_flags |= TUN_IASET;
464 
465 			if (ifp->if_flags & IFF_POINTOPOINT) {
466 				sin = (struct sockaddr_in6 *)ifa->ifa_dstaddr;
467 				if (sin &&
468 				    !IN6_IS_ADDR_UNSPECIFIED(&sin->sin6_addr))
469 					tp->tun_flags |= TUN_DSTADDR;
470 			} else
471 				tp->tun_flags &= ~TUN_DSTADDR;
472 		}
473 #endif /* INET6 */
474 	}
475 
476 	return (0);
477 }
478 
479 /*
480  * Process an ioctl request.
481  */
482 int
483 tun_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
484 {
485 	struct tun_softc	*tp = (struct tun_softc *)(ifp->if_softc);
486 	struct ifreq		*ifr = (struct ifreq *)data;
487 	int			 error = 0, s;
488 
489 	s = splnet();
490 
491 	switch (cmd) {
492 	case SIOCSIFADDR:
493 		tuninit(tp);
494 		TUNDEBUG(("%s: address set\n", ifp->if_xname));
495 		if (tp->tun_flags & TUN_LAYER2)
496 			switch (((struct ifaddr *)data)->ifa_addr->sa_family) {
497 #ifdef INET
498 			case AF_INET:
499 				arp_ifinit(&tp->arpcom, (struct ifaddr *)data);
500 				break;
501 #endif
502 			default:
503 				break;
504 			}
505 		break;
506 	case SIOCSIFDSTADDR:
507 		tuninit(tp);
508 		TUNDEBUG(("%s: destination address set\n", ifp->if_xname));
509 		break;
510 	case SIOCSIFBRDADDR:
511 		tuninit(tp);
512 		TUNDEBUG(("%s: broadcast address set\n", ifp->if_xname));
513 		break;
514 	case SIOCSIFMTU:
515 		if (ifr->ifr_mtu < ETHERMIN || ifr->ifr_mtu > TUNMRU)
516 			error = EINVAL;
517 		else
518 			ifp->if_mtu = ifr->ifr_mtu;
519 		break;
520 	case SIOCADDMULTI:
521 	case SIOCDELMULTI: {
522 		if (ifr == 0) {
523 			error = EAFNOSUPPORT;	   /* XXX */
524 			break;
525 		}
526 
527 		if (tp->tun_flags & TUN_LAYER2) {
528 			error = (cmd == SIOCADDMULTI) ?
529 			    ether_addmulti(ifr, &tp->arpcom) :
530 			    ether_delmulti(ifr, &tp->arpcom);
531 			if (error == ENETRESET) {
532 				/*
533 				 * Multicast list has changed; set the hardware
534 				 * filter accordingly. The good thing is we do
535 				 * not have a hardware filter (:
536 				 */
537 				error = 0;
538 			}
539 			break;
540 		}
541 
542 		switch (ifr->ifr_addr.sa_family) {
543 #ifdef INET
544 		case AF_INET:
545 			break;
546 #endif
547 #ifdef INET6
548 		case AF_INET6:
549 			break;
550 #endif
551 		default:
552 			error = EAFNOSUPPORT;
553 			break;
554 		}
555 		break;
556 	}
557 
558 	case SIOCSIFFLAGS:
559 		error = tun_switch(tp,
560 		    ifp->if_flags & IFF_LINK0 ? TUN_LAYER2 : 0);
561 		break;
562 	case SIOCGIFMEDIA:
563 	case SIOCSIFMEDIA:
564 		error = ifmedia_ioctl(ifp, ifr, &tp->tun_media, cmd);
565 		break;
566 	default:
567 		if (tp->tun_flags & TUN_LAYER2)
568 			error = ether_ioctl(ifp, &tp->arpcom, cmd, data);
569 		else
570 			error = ENOTTY;
571 	}
572 
573 	splx(s);
574 	return (error);
575 }
576 
577 /*
578  * tun_output - queue packets from higher level ready to put out.
579  */
580 int
581 tun_output(struct ifnet *ifp, struct mbuf *m0, struct sockaddr *dst,
582     struct rtentry *rt)
583 {
584 	struct tun_softc	*tp = ifp->if_softc;
585 	int			 s, len, error;
586 	u_int32_t		*af;
587 
588 	if ((ifp->if_flags & (IFF_UP|IFF_RUNNING)) != (IFF_UP|IFF_RUNNING)) {
589 		m_freem(m0);
590 		return (EHOSTDOWN);
591 	}
592 
593 	TUNDEBUG(("%s: tun_output\n", ifp->if_xname));
594 
595 	if ((tp->tun_flags & TUN_READY) != TUN_READY) {
596 		TUNDEBUG(("%s: not ready %#x\n", ifp->if_xname,
597 		     tp->tun_flags));
598 		m_freem(m0);
599 		return (EHOSTDOWN);
600 	}
601 
602 	if (tp->tun_flags & TUN_LAYER2)
603 		/* call ether_output and that will call tunstart at the end */
604 		return (ether_output(ifp, m0, dst, rt));
605 
606 	M_PREPEND(m0, sizeof(*af), M_DONTWAIT);
607 	if (m0 == NULL)
608 		return (ENOBUFS);
609 	af = mtod(m0, u_int32_t *);
610 	*af = htonl(dst->sa_family);
611 
612 	s = splnet();
613 
614 #if NBPFILTER > 0
615 	if (ifp->if_bpf)
616 		bpf_mtap(ifp->if_bpf, m0, BPF_DIRECTION_OUT);
617 #endif
618 
619 	len = m0->m_pkthdr.len;
620 	IFQ_ENQUEUE(&ifp->if_snd, m0, NULL, error);
621 	if (error) {
622 		splx(s);
623 		ifp->if_collisions++;
624 		return (error);
625 	}
626 	splx(s);
627 	ifp->if_opackets++;
628 	ifp->if_obytes += len;
629 
630 	tun_wakeup(tp);
631 	return (0);
632 }
633 
634 void
635 tun_wakeup(struct tun_softc *tp)
636 {
637 	if (tp->tun_flags & TUN_RWAIT) {
638 		tp->tun_flags &= ~TUN_RWAIT;
639 		wakeup((caddr_t)tp);
640 	}
641 	if (tp->tun_flags & TUN_ASYNC && tp->tun_pgid)
642 		csignal(tp->tun_pgid, SIGIO,
643 		    tp->tun_siguid, tp->tun_sigeuid);
644 	selwakeup(&tp->tun_rsel);
645 	KNOTE(&tp->tun_rsel.si_note, 0);
646 }
647 
648 /*
649  * the cdevsw interface is now pretty minimal.
650  */
651 int
652 tunioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p)
653 {
654 	int			 s;
655 	struct tun_softc	*tp;
656 	struct tuninfo		*tunp;
657 	struct mbuf		*m;
658 
659 	if ((tp = tun_lookup(minor(dev))) == NULL)
660 		return (ENXIO);
661 
662 	s = splnet();
663 	switch (cmd) {
664 	case TUNSIFINFO:
665 		tunp = (struct tuninfo *)data;
666 		if (tunp->mtu < ETHERMIN || tunp->mtu > TUNMRU) {
667 			splx(s);
668 			return (EINVAL);
669 		}
670 		tp->tun_if.if_mtu = tunp->mtu;
671 		tp->tun_if.if_type = tunp->type;
672 		tp->tun_if.if_flags =
673 		    (tunp->flags & TUN_IFF_FLAGS) |
674 		    (tp->tun_if.if_flags & ~TUN_IFF_FLAGS);
675 		tp->tun_if.if_baudrate = tunp->baudrate;
676 		break;
677 	case TUNGIFINFO:
678 		tunp = (struct tuninfo *)data;
679 		tunp->mtu = tp->tun_if.if_mtu;
680 		tunp->type = tp->tun_if.if_type;
681 		tunp->flags = tp->tun_if.if_flags;
682 		tunp->baudrate = tp->tun_if.if_baudrate;
683 		break;
684 #ifdef TUN_DEBUG
685 	case TUNSDEBUG:
686 		tundebug = *(int *)data;
687 		break;
688 	case TUNGDEBUG:
689 		*(int *)data = tundebug;
690 		break;
691 #endif
692 	case TUNSIFMODE:
693 		switch (*(int *)data & (IFF_POINTOPOINT|IFF_BROADCAST)) {
694 		case IFF_POINTOPOINT:
695 		case IFF_BROADCAST:
696 			tp->tun_if.if_flags &= ~TUN_IFF_FLAGS;
697 			tp->tun_if.if_flags |= *(int *)data & TUN_IFF_FLAGS;
698 			break;
699 		default:
700 			splx(s);
701 			return (EINVAL);
702 		}
703 		break;
704 
705 	case FIONBIO:
706 		if (*(int *)data)
707 			tp->tun_flags |= TUN_NBIO;
708 		else
709 			tp->tun_flags &= ~TUN_NBIO;
710 		break;
711 	case FIOASYNC:
712 		if (*(int *)data)
713 			tp->tun_flags |= TUN_ASYNC;
714 		else
715 			tp->tun_flags &= ~TUN_ASYNC;
716 		break;
717 	case FIONREAD:
718 		IFQ_POLL(&tp->tun_if.if_snd, m);
719 		if (m != NULL)
720 			*(int *)data = m->m_pkthdr.len;
721 		else
722 			*(int *)data = 0;
723 		break;
724 	case TIOCSPGRP:
725 		tp->tun_pgid = *(int *)data;
726 		tp->tun_siguid = p->p_cred->p_ruid;
727 		tp->tun_sigeuid = p->p_ucred->cr_uid;
728 		break;
729 	case TIOCGPGRP:
730 		*(int *)data = tp->tun_pgid;
731 		break;
732 	case OSIOCGIFADDR:
733 	case SIOCGIFADDR:
734 		if (!(tp->tun_flags & TUN_LAYER2)) {
735 			splx(s);
736 			return (EINVAL);
737 		}
738 		bcopy(tp->arpcom.ac_enaddr, data,
739 		    sizeof(tp->arpcom.ac_enaddr));
740 		break;
741 
742 	case SIOCSIFADDR:
743 		if (!(tp->tun_flags & TUN_LAYER2)) {
744 			splx(s);
745 			return (EINVAL);
746 		}
747 		bcopy(data, tp->arpcom.ac_enaddr,
748 		    sizeof(tp->arpcom.ac_enaddr));
749 		break;
750 	default:
751 		splx(s);
752 		return (ENOTTY);
753 	}
754 	splx(s);
755 	return (0);
756 }
757 
758 /*
759  * The cdevsw read interface - reads a packet at a time, or at
760  * least as much of a packet as can be read.
761  */
762 int
763 tunread(dev_t dev, struct uio *uio, int ioflag)
764 {
765 	struct tun_softc	*tp;
766 	struct ifnet		*ifp;
767 	struct mbuf		*m, *m0;
768 	int			 error = 0, len, s;
769 
770 	if ((tp = tun_lookup(minor(dev))) == NULL)
771 		return (ENXIO);
772 
773 	ifp = &tp->tun_if;
774 	TUNDEBUG(("%s: read\n", ifp->if_xname));
775 	if ((tp->tun_flags & TUN_READY) != TUN_READY) {
776 		TUNDEBUG(("%s: not ready %#x\n", ifp->if_xname, tp->tun_flags));
777 		return (EHOSTDOWN);
778 	}
779 
780 	tp->tun_flags &= ~TUN_RWAIT;
781 
782 	s = splnet();
783 	do {
784 		while ((tp->tun_flags & TUN_READY) != TUN_READY)
785 			if ((error = tsleep((caddr_t)tp,
786 			    (PZERO + 1)|PCATCH, "tunread", 0)) != 0) {
787 				splx(s);
788 				return (error);
789 			}
790 		IFQ_DEQUEUE(&ifp->if_snd, m0);
791 		if (m0 == NULL) {
792 			if (tp->tun_flags & TUN_NBIO && ioflag & IO_NDELAY) {
793 				splx(s);
794 				return (EWOULDBLOCK);
795 			}
796 			tp->tun_flags |= TUN_RWAIT;
797 			if ((error = tsleep((caddr_t)tp,
798 			    (PZERO + 1)|PCATCH, "tunread", 0)) != 0) {
799 				splx(s);
800 				return (error);
801 			}
802 		}
803 	} while (m0 == NULL);
804 	splx(s);
805 
806 	while (m0 != NULL && uio->uio_resid > 0 && error == 0) {
807 		len = min(uio->uio_resid, m0->m_len);
808 		if (len != 0)
809 			error = uiomove(mtod(m0, caddr_t), len, uio);
810 		MFREE(m0, m);
811 		m0 = m;
812 	}
813 
814 	if (m0 != NULL) {
815 		TUNDEBUG(("Dropping mbuf\n"));
816 		m_freem(m0);
817 	}
818 	if (error)
819 		ifp->if_ierrors++;
820 
821 	return (error);
822 }
823 
824 /*
825  * the cdevsw write interface - an atomic write is a packet - or else!
826  */
827 int
828 tunwrite(dev_t dev, struct uio *uio, int ioflag)
829 {
830 	struct tun_softc	*tp;
831 	struct ifnet		*ifp;
832 	struct ifqueue		*ifq;
833 	u_int32_t		*th;
834 	struct mbuf		*top, **mp, *m;
835 	int			 isr;
836 	int			 error=0, s, tlen, mlen;
837 
838 	if ((tp = tun_lookup(minor(dev))) == NULL)
839 		return (ENXIO);
840 
841 	ifp = &tp->tun_if;
842 	TUNDEBUG(("%s: tunwrite\n", ifp->if_xname));
843 
844 	if (uio->uio_resid == 0 || uio->uio_resid > ifp->if_mtu +
845 	    (tp->tun_flags & TUN_LAYER2 ? ETHER_HDR_LEN : sizeof(*th))) {
846 		TUNDEBUG(("%s: len=%d!\n", ifp->if_xname, uio->uio_resid));
847 		return (EMSGSIZE);
848 	}
849 	tlen = uio->uio_resid;
850 
851 	/* get a header mbuf */
852 	MGETHDR(m, M_DONTWAIT, MT_DATA);
853 	if (m == NULL)
854 		return (ENOBUFS);
855 	mlen = MHLEN;
856 	if (uio->uio_resid >= MINCLSIZE) {
857 		MCLGET(m, M_DONTWAIT);
858 		if (!(m->m_flags & M_EXT)) {
859 			m_free(m);
860 			return (ENOBUFS);
861 		}
862 		mlen = MCLBYTES;
863 	}
864 
865 	top = NULL;
866 	mp = &top;
867 	if (tp->tun_flags & TUN_LAYER2) {
868 		/*
869 		 * Pad so that IP header is correctly aligned
870 		 * this is necessary for all strict aligned architectures.
871 		 */
872 		mlen -= ETHER_ALIGN;
873 		m->m_data += ETHER_ALIGN;
874 	}
875 	while (error == 0 && uio->uio_resid > 0) {
876 		m->m_len = min(mlen, uio->uio_resid);
877 		error = uiomove(mtod (m, caddr_t), m->m_len, uio);
878 		*mp = m;
879 		mp = &m->m_next;
880 		if (error == 0 && uio->uio_resid > 0) {
881 			MGET(m, M_DONTWAIT, MT_DATA);
882 			if (m == NULL) {
883 				error = ENOBUFS;
884 				break;
885 			}
886 			mlen = MLEN;
887 			if (uio->uio_resid >= MINCLSIZE) {
888 				MCLGET(m, M_DONTWAIT);
889 				if (!(m->m_flags & M_EXT)) {
890 					error = ENOBUFS;
891 					m_free(m);
892 					break;
893 				}
894 				mlen = MCLBYTES;
895 			}
896 		}
897 	}
898 	if (error) {
899 		if (top != NULL)
900 			m_freem(top);
901 		ifp->if_ierrors++;
902 		return (error);
903 	}
904 
905 	top->m_pkthdr.len = tlen;
906 	top->m_pkthdr.rcvif = ifp;
907 
908 #if NBPFILTER > 0
909 	if (ifp->if_bpf) {
910 		s = splnet();
911 		bpf_mtap(ifp->if_bpf, top, BPF_DIRECTION_IN);
912 		splx(s);
913 	}
914 #endif
915 
916 	if (tp->tun_flags & TUN_LAYER2) {
917 		/* quirk to not add randomness from a virtual device */
918 		atomic_setbits_int(&netisr, (1 << NETISR_RND_DONE));
919 
920 		s = splnet();
921 		ether_input_mbuf(ifp, top);
922 		splx(s);
923 
924 		ifp->if_ipackets++; /* ibytes are counted in ether_input */
925 
926 		return (0);
927 	}
928 
929 	th = mtod(top, u_int32_t *);
930 	/* strip the tunnel header */
931 	top->m_data += sizeof(*th);
932 	top->m_len  -= sizeof(*th);
933 	top->m_pkthdr.len -= sizeof(*th);
934 	top->m_pkthdr.rdomain = ifp->if_rdomain;
935 
936 	switch (ntohl(*th)) {
937 #ifdef INET
938 	case AF_INET:
939 		ifq = &ipintrq;
940 		isr = NETISR_IP;
941 		break;
942 #endif
943 #ifdef INET6
944 	case AF_INET6:
945 		ifq = &ip6intrq;
946 		isr = NETISR_IPV6;
947 		break;
948 #endif
949 #ifdef NETATALK
950 	case AF_APPLETALK:
951 		ifq = &atintrq2;
952 		isr = NETISR_ATALK;
953 		break;
954 #endif
955 	default:
956 		m_freem(top);
957 		return (EAFNOSUPPORT);
958 	}
959 
960 	s = splnet();
961 	if (IF_QFULL(ifq)) {
962 		IF_DROP(ifq);
963 		splx(s);
964 		ifp->if_collisions++;
965 		m_freem(top);
966 		if (!ifq->ifq_congestion)
967 			if_congestion(ifq);
968 		return (ENOBUFS);
969 	}
970 	IF_ENQUEUE(ifq, top);
971 	schednetisr(isr);
972 	ifp->if_ipackets++;
973 	ifp->if_ibytes += top->m_pkthdr.len;
974 	splx(s);
975 	return (error);
976 }
977 
978 /*
979  * tunpoll - the poll interface, this is only useful on reads
980  * really. The write detect always returns true, write never blocks
981  * anyway, it either accepts the packet or drops it.
982  */
983 int
984 tunpoll(dev_t dev, int events, struct proc *p)
985 {
986 	int			 revents, s;
987 	struct tun_softc	*tp;
988 	struct ifnet		*ifp;
989 	struct mbuf		*m;
990 
991 	if ((tp = tun_lookup(minor(dev))) == NULL)
992 		return (POLLERR);
993 
994 	ifp = &tp->tun_if;
995 	revents = 0;
996 	s = splnet();
997 	TUNDEBUG(("%s: tunpoll\n", ifp->if_xname));
998 
999 	if (events & (POLLIN | POLLRDNORM)) {
1000 		IFQ_POLL(&ifp->if_snd, m);
1001 		if (m != NULL) {
1002 			TUNDEBUG(("%s: tunselect q=%d\n", ifp->if_xname,
1003 			    ifp->if_snd.ifq_len));
1004 			revents |= events & (POLLIN | POLLRDNORM);
1005 		} else {
1006 			TUNDEBUG(("%s: tunpoll waiting\n", ifp->if_xname));
1007 			selrecord(p, &tp->tun_rsel);
1008 		}
1009 	}
1010 	if (events & (POLLOUT | POLLWRNORM))
1011 		revents |= events & (POLLOUT | POLLWRNORM);
1012 	splx(s);
1013 	return (revents);
1014 }
1015 
1016 /*
1017  * kqueue(2) support.
1018  *
1019  * The tun driver uses an array of tun_softc's based on the minor number
1020  * of the device.  kn->kn_hook gets set to the specific tun_softc.
1021  *
1022  * filt_tunread() sets kn->kn_data to the iface qsize
1023  * filt_tunwrite() sets kn->kn_data to the MTU size
1024  */
1025 int
1026 tunkqfilter(dev_t dev, struct knote *kn)
1027 {
1028 	int			 s;
1029 	struct klist		*klist;
1030 	struct tun_softc	*tp;
1031 	struct ifnet		*ifp;
1032 
1033 	if ((tp = tun_lookup(minor(dev))) == NULL)
1034 		return (ENXIO);
1035 
1036 	ifp = &tp->tun_if;
1037 
1038 	s = splnet();
1039 	TUNDEBUG(("%s: tunkqfilter\n", ifp->if_xname));
1040 	splx(s);
1041 
1042 	switch (kn->kn_filter) {
1043 		case EVFILT_READ:
1044 			klist = &tp->tun_rsel.si_note;
1045 			kn->kn_fop = &tunread_filtops;
1046 			break;
1047 		case EVFILT_WRITE:
1048 			klist = &tp->tun_wsel.si_note;
1049 			kn->kn_fop = &tunwrite_filtops;
1050 			break;
1051 		default:
1052 			return (EPERM);	/* 1 */
1053 	}
1054 
1055 	kn->kn_hook = (caddr_t)tp;
1056 
1057 	s = splhigh();
1058 	SLIST_INSERT_HEAD(klist, kn, kn_selnext);
1059 	splx(s);
1060 
1061 	return (0);
1062 }
1063 
1064 void
1065 filt_tunrdetach(struct knote *kn)
1066 {
1067 	int			 s;
1068 	struct tun_softc	*tp;
1069 
1070 	tp = (struct tun_softc *)kn->kn_hook;
1071 	s = splhigh();
1072 	if (!(kn->kn_status & KN_DETACHED))
1073 		SLIST_REMOVE(&tp->tun_rsel.si_note, kn, knote, kn_selnext);
1074 	splx(s);
1075 }
1076 
1077 int
1078 filt_tunread(struct knote *kn, long hint)
1079 {
1080 	int			 s;
1081 	struct tun_softc	*tp;
1082 	struct ifnet		*ifp;
1083 	struct mbuf		*m;
1084 
1085 	if (kn->kn_status & KN_DETACHED) {
1086 		kn->kn_data = 0;
1087 		return (1);
1088 	}
1089 
1090 	tp = (struct tun_softc *)kn->kn_hook;
1091 	ifp = &tp->tun_if;
1092 
1093 	s = splnet();
1094 	IFQ_POLL(&ifp->if_snd, m);
1095 	if (m != NULL) {
1096 		splx(s);
1097 		kn->kn_data = ifp->if_snd.ifq_len;
1098 
1099 		TUNDEBUG(("%s: tunkqread q=%d\n", ifp->if_xname,
1100 		    ifp->if_snd.ifq_len));
1101 		return (1);
1102 	}
1103 	splx(s);
1104 	TUNDEBUG(("%s: tunkqread waiting\n", ifp->if_xname));
1105 	return (0);
1106 }
1107 
1108 void
1109 filt_tunwdetach(struct knote *kn)
1110 {
1111 	int			 s;
1112 	struct tun_softc	*tp;
1113 
1114 	tp = (struct tun_softc *)kn->kn_hook;
1115 	s = splhigh();
1116 	if (!(kn->kn_status & KN_DETACHED))
1117 		SLIST_REMOVE(&tp->tun_wsel.si_note, kn, knote, kn_selnext);
1118 	splx(s);
1119 }
1120 
1121 int
1122 filt_tunwrite(struct knote *kn, long hint)
1123 {
1124 	struct tun_softc	*tp;
1125 	struct ifnet		*ifp;
1126 
1127 	if (kn->kn_status & KN_DETACHED) {
1128 		kn->kn_data = 0;
1129 		return (1);
1130 	}
1131 
1132 	tp = (struct tun_softc *)kn->kn_hook;
1133 	ifp = &tp->tun_if;
1134 
1135 	kn->kn_data = ifp->if_mtu;
1136 
1137 	return (1);
1138 }
1139 
1140 /*
1141  * Start packet transmission on the interface.
1142  * when the interface queue is rate-limited by ALTQ or TBR,
1143  * if_start is needed to drain packets from the queue in order
1144  * to notify readers when outgoing packets become ready.
1145  * In layer 2 mode this function is called from ether_output.
1146  */
1147 void
1148 tunstart(struct ifnet *ifp)
1149 {
1150 	struct tun_softc	*tp = ifp->if_softc;
1151 	struct mbuf		*m;
1152 
1153 	splassert(IPL_NET);
1154 
1155 	if (!(tp->tun_flags & TUN_LAYER2) &&
1156 	    !ALTQ_IS_ENABLED(&ifp->if_snd) &&
1157 	    !TBR_IS_ENABLED(&ifp->if_snd))
1158 		return;
1159 
1160 	IFQ_POLL(&ifp->if_snd, m);
1161 	if (m != NULL) {
1162 		if (tp->tun_flags & TUN_LAYER2) {
1163 #if NBPFILTER > 0
1164 			if (ifp->if_bpf)
1165 				bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT);
1166 #endif
1167 			ifp->if_opackets++;
1168 		}
1169 		tun_wakeup(tp);
1170 	}
1171 }
1172 
1173 void
1174 tun_link_state(struct tun_softc *tp)
1175 {
1176 	struct ifnet *ifp = &tp->tun_if;
1177 	int link_state = LINK_STATE_DOWN;
1178 
1179 	if (tp->tun_flags & TUN_OPEN) {
1180 		if (tp->tun_flags & TUN_LAYER2)
1181 			link_state = LINK_STATE_FULL_DUPLEX;
1182 		else
1183 			link_state = LINK_STATE_UP;
1184 	}
1185 	if (ifp->if_link_state != link_state) {
1186 		ifp->if_link_state = link_state;
1187 		if_link_state_change(ifp);
1188 	}
1189 }
1190 
1191 int
1192 tun_media_change(struct ifnet *ifp)
1193 {
1194 	/* Ignore */
1195 	return (0);
1196 }
1197 
1198 void
1199 tun_media_status(struct ifnet *ifp, struct ifmediareq *imr)
1200 {
1201 	struct tun_softc *tp = ifp->if_softc;
1202 
1203 	imr->ifm_active = IFM_ETHER | IFM_AUTO;
1204 	imr->ifm_status = IFM_AVALID;
1205 
1206 	tun_link_state(tp);
1207 
1208 	if (LINK_STATE_IS_UP(ifp->if_link_state) &&
1209 	    ifp->if_flags & IFF_UP)
1210 		imr->ifm_status |= IFM_ACTIVE;
1211 }
1212