xref: /openbsd-src/sys/net/if_tun.c (revision 5054e3e78af0749a9bb00ba9a024b3ee2d90290f)
1 /*	$OpenBSD: if_tun.c,v 1.100 2009/11/09 17:53:39 nicm Exp $	*/
2 /*	$NetBSD: if_tun.c,v 1.24 1996/05/07 02:40:48 thorpej Exp $	*/
3 
4 /*
5  * Copyright (c) 1988, Julian Onions <Julian.Onions@nexor.co.uk>
6  * Nottingham University 1987.
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29 
30 /*
31  * This driver takes packets off the IP i/f and hands them up to a
32  * user process to have its wicked way with. This driver has its
33  * roots in a similar driver written by Phil Cockcroft (formerly) at
34  * UCL. This driver is based much more on read/write/select mode of
35  * operation though.
36  */
37 
38 /* #define	TUN_DEBUG	9 */
39 
40 #include <sys/param.h>
41 #include <sys/kernel.h>
42 #include <sys/proc.h>
43 #include <sys/systm.h>
44 #include <sys/mbuf.h>
45 #include <sys/protosw.h>
46 #include <sys/socket.h>
47 #include <sys/ioctl.h>
48 #include <sys/errno.h>
49 #include <sys/syslog.h>
50 #include <sys/selinfo.h>
51 #include <sys/file.h>
52 #include <sys/time.h>
53 #include <sys/device.h>
54 #include <sys/vnode.h>
55 #include <sys/signalvar.h>
56 #include <sys/poll.h>
57 #include <sys/conf.h>
58 
59 #include <machine/cpu.h>
60 
61 #include <net/if.h>
62 #include <net/if_media.h>
63 #include <net/if_types.h>
64 #include <net/netisr.h>
65 #include <net/route.h>
66 
67 #ifdef INET
68 #include <netinet/in.h>
69 #include <netinet/in_systm.h>
70 #include <netinet/in_var.h>
71 #include <netinet/ip.h>
72 #include <netinet/if_ether.h>
73 #endif
74 
75 #ifdef NETATALK
76 #include <netatalk/at.h>
77 #include <netatalk/at_var.h>
78 #endif
79 
80 #include "bpfilter.h"
81 #if NBPFILTER > 0
82 #include <net/bpf.h>
83 #endif
84 
85 /* for arc4random() */
86 #include <dev/rndvar.h>
87 
88 #include <net/if_tun.h>
89 
90 struct tun_softc {
91 	struct arpcom	arpcom;		/* ethernet common data */
92 	struct selinfo	tun_rsel;	/* read select */
93 	struct selinfo	tun_wsel;	/* write select (not used) */
94 	LIST_ENTRY(tun_softc) tun_list;	/* all tunnel interfaces */
95 	struct ifmedia	tun_media;
96 	int		tun_unit;
97 	uid_t		tun_siguid;	/* uid for process that set tun_pgid */
98 	uid_t		tun_sigeuid;	/* euid for process that set tun_pgid */
99 	pid_t		tun_pgid;	/* the process group - if any */
100 	u_short		tun_flags;	/* misc flags */
101 #define tun_if	arpcom.ac_if
102 };
103 
104 #ifdef	TUN_DEBUG
105 int	tundebug = TUN_DEBUG;
106 #define TUNDEBUG(a)	(tundebug? printf a : 0)
107 #else
108 #define TUNDEBUG(a)	/* (tundebug? printf a : 0) */
109 #endif
110 
111 /* Only these IFF flags are changeable by TUNSIFINFO */
112 #define TUN_IFF_FLAGS (IFF_UP|IFF_POINTOPOINT|IFF_MULTICAST|IFF_BROADCAST)
113 
114 void	tunattach(int);
115 int	tunopen(dev_t, int, int, struct proc *);
116 int	tunclose(dev_t, int, int, struct proc *);
117 int	tun_ioctl(struct ifnet *, u_long, caddr_t);
118 int	tun_output(struct ifnet *, struct mbuf *, struct sockaddr *,
119 	    struct rtentry *);
120 int	tunioctl(dev_t, u_long, caddr_t, int, struct proc *);
121 int	tunread(dev_t, struct uio *, int);
122 int	tunwrite(dev_t, struct uio *, int);
123 int	tunpoll(dev_t, int, struct proc *);
124 int	tunkqfilter(dev_t, struct knote *);
125 int	tun_clone_create(struct if_clone *, int);
126 int	tun_create(struct if_clone *, int, int);
127 int	tun_clone_destroy(struct ifnet *);
128 struct	tun_softc *tun_lookup(int);
129 void	tun_wakeup(struct tun_softc *);
130 int	tun_switch(struct tun_softc *, int);
131 
132 int	tuninit(struct tun_softc *);
133 int	filt_tunread(struct knote *, long);
134 int	filt_tunwrite(struct knote *, long);
135 void	filt_tunrdetach(struct knote *);
136 void	filt_tunwdetach(struct knote *);
137 void	tunstart(struct ifnet *);
138 void	tun_link_state(struct tun_softc *);
139 int	tun_media_change(struct ifnet *);
140 void	tun_media_status(struct ifnet *, struct ifmediareq *);
141 
142 struct filterops tunread_filtops =
143 	{ 1, NULL, filt_tunrdetach, filt_tunread};
144 
145 struct filterops tunwrite_filtops =
146 	{ 1, NULL, filt_tunwdetach, filt_tunwrite};
147 
148 LIST_HEAD(, tun_softc) tun_softc_list;
149 
150 struct if_clone tun_cloner =
151     IF_CLONE_INITIALIZER("tun", tun_clone_create, tun_clone_destroy);
152 
153 void
154 tunattach(int n)
155 {
156 	LIST_INIT(&tun_softc_list);
157 	if_clone_attach(&tun_cloner);
158 }
159 
160 int
161 tun_clone_create(struct if_clone *ifc, int unit)
162 {
163 	return (tun_create(ifc, unit, 0));
164 }
165 
166 int
167 tun_create(struct if_clone *ifc, int unit, int flags)
168 {
169 	struct tun_softc	*tp;
170 	struct ifnet		*ifp;
171 	u_int32_t		 macaddr_rnd;
172 	int			 s;
173 
174 	tp = malloc(sizeof(*tp), M_DEVBUF, M_NOWAIT|M_ZERO);
175 	if (!tp)
176 		return (ENOMEM);
177 
178 	tp->tun_unit = unit;
179 	tp->tun_flags = TUN_INITED|TUN_STAYUP;
180 
181 	/* generate fake MAC address: 00 bd xx xx xx unit_no */
182 	tp->arpcom.ac_enaddr[0] = 0x00;
183 	tp->arpcom.ac_enaddr[1] = 0xbd;
184 	/*
185 	 * This no longer happens pre-scheduler so let's use the real
186 	 * random subsystem instead of random().
187 	 */
188 	macaddr_rnd = arc4random();
189 	bcopy(&macaddr_rnd, &tp->arpcom.ac_enaddr[2], sizeof(u_int32_t));
190 	tp->arpcom.ac_enaddr[5] = (u_char)unit + 1;
191 
192 	ifp = &tp->tun_if;
193 	snprintf(ifp->if_xname, sizeof ifp->if_xname, "%s%d", ifc->ifc_name,
194 	    unit);
195 	ifp->if_softc = tp;
196 	ifp->if_ioctl = tun_ioctl;
197 	ifp->if_output = tun_output;
198 	ifp->if_start = tunstart;
199 	ifp->if_hardmtu = TUNMRU;
200 	IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen);
201 	IFQ_SET_READY(&ifp->if_snd);
202 
203 	ifmedia_init(&tp->tun_media, 0, tun_media_change, tun_media_status);
204 	ifmedia_add(&tp->tun_media, IFM_ETHER | IFM_AUTO, 0, NULL);
205 	ifmedia_set(&tp->tun_media, IFM_ETHER | IFM_AUTO);
206 
207 	if ((flags & TUN_LAYER2) == 0) {
208 		tp->tun_flags &= ~TUN_LAYER2;
209 		ifp->if_mtu = ETHERMTU;
210 		ifp->if_flags = IFF_POINTOPOINT;
211 		ifp->if_type = IFT_TUNNEL;
212 		ifp->if_hdrlen = sizeof(u_int32_t);
213 
214 		if_attach(ifp);
215 		if_alloc_sadl(ifp);
216 #if NBPFILTER > 0
217 		bpfattach(&ifp->if_bpf, ifp, DLT_LOOP, sizeof(u_int32_t));
218 #endif
219 	} else {
220 		tp->tun_flags |= TUN_LAYER2;
221 		ifp->if_flags =
222 		    (IFF_BROADCAST|IFF_SIMPLEX|IFF_MULTICAST|IFF_LINK0);
223 		ifp->if_capabilities = IFCAP_VLAN_MTU;
224 
225 		if_attach(ifp);
226 		ether_ifattach(ifp);
227 	}
228 	/* force output function to our function */
229 	ifp->if_output = tun_output;
230 
231 	s = splnet();
232 	LIST_INSERT_HEAD(&tun_softc_list, tp, tun_list);
233 	splx(s);
234 
235 	return (0);
236 }
237 
238 int
239 tun_clone_destroy(struct ifnet *ifp)
240 {
241 	struct tun_softc	*tp = ifp->if_softc;
242 	int			 s;
243 
244 	tun_wakeup(tp);
245 
246 	s = splhigh();
247 	klist_invalidate(&tp->tun_rsel.si_note);
248 	klist_invalidate(&tp->tun_wsel.si_note);
249 	splx(s);
250 
251 	s = splnet();
252 	LIST_REMOVE(tp, tun_list);
253 	splx(s);
254 
255 	if (tp->tun_flags & TUN_LAYER2)
256 		ether_ifdetach(ifp);
257 
258 	if_detach(ifp);
259 
260 	free(tp, M_DEVBUF);
261 	return (0);
262 }
263 
264 struct tun_softc *
265 tun_lookup(int unit)
266 {
267 	struct tun_softc *tp;
268 
269 	LIST_FOREACH(tp, &tun_softc_list, tun_list)
270 		if (tp->tun_unit == unit)
271 			return (tp);
272 	return (NULL);
273 }
274 
275 int
276 tun_switch(struct tun_softc *tp, int flags)
277 {
278 	struct ifnet		*ifp = &tp->tun_if;
279 	int			 unit, open, r, s;
280 	struct ifg_list		*ifgl;
281 	u_int			ifgr_len;
282 	char			*ifgrpnames, *p;
283 
284 	if ((tp->tun_flags & TUN_LAYER2) == (flags & TUN_LAYER2))
285 		return (0);
286 
287 	/* tp will be removed so store unit number */
288 	unit = tp->tun_unit;
289 	open = tp->tun_flags & (TUN_OPEN|TUN_NBIO|TUN_ASYNC);
290 	TUNDEBUG(("%s: switching to layer %d\n", ifp->if_xname,
291 		    flags & TUN_LAYER2 ? 2 : 3));
292 
293 	/* remember joined groups */
294 	ifgr_len = 0;
295 	ifgrpnames = NULL;
296 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
297 		ifgr_len += IFNAMSIZ;
298 	if (ifgr_len)
299 		ifgrpnames = malloc(ifgr_len + 1, M_TEMP, M_NOWAIT|M_ZERO);
300 	if (ifgrpnames) {
301 		p = ifgrpnames;
302 		TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
303 			strlcpy(p, ifgl->ifgl_group->ifg_group, IFNAMSIZ);
304 			p += IFNAMSIZ;
305 		}
306 	}
307 
308 	/* remove old device and ... */
309 	tun_clone_destroy(ifp);
310 	/* attach new interface */
311 	r = tun_create(&tun_cloner, unit, flags);
312 
313 	if (r == 0) {
314 		if ((tp = tun_lookup(unit)) == NULL) {
315 			/* this should never fail */
316 			r = ENXIO;
317 			goto abort;
318 		}
319 
320 		/* rejoin groups */
321 		ifp = &tp->tun_if;
322 		for (p = ifgrpnames; p && *p; p += IFNAMSIZ)
323 			if_addgroup(ifp, p);
324 	}
325 	if (open && r == 0) {
326 		/* already opened before ifconfig tunX link0 */
327 		s = splnet();
328 		tp->tun_flags |= open;
329 		tun_link_state(tp);
330 		splx(s);
331 		TUNDEBUG(("%s: already open\n", tp->tun_if.if_xname));
332 	}
333  abort:
334 	if (ifgrpnames)
335 		free(ifgrpnames, M_TEMP);
336 	return (r);
337 }
338 
339 /*
340  * tunnel open - must be superuser & the device must be
341  * configured in
342  */
343 int
344 tunopen(dev_t dev, int flag, int mode, struct proc *p)
345 {
346 	struct tun_softc	*tp;
347 	struct ifnet		*ifp;
348 	int			 error, s;
349 
350 	if ((error = suser(p, 0)) != 0)
351 		return (error);
352 
353 	if ((tp = tun_lookup(minor(dev))) == NULL) {	/* create on demand */
354 		char	xname[IFNAMSIZ];
355 
356 		snprintf(xname, sizeof(xname), "%s%d", "tun", minor(dev));
357 		if ((error = if_clone_create(xname)) != 0)
358 			return (error);
359 
360 		if ((tp = tun_lookup(minor(dev))) == NULL)
361 			return (ENXIO);
362 		tp->tun_flags &= ~TUN_STAYUP;
363 	}
364 
365 	if (tp->tun_flags & TUN_OPEN)
366 		return (EBUSY);
367 
368 	ifp = &tp->tun_if;
369 	tp->tun_flags |= TUN_OPEN;
370 
371 	/* automatically UP the interface on open */
372 	s = splnet();
373 	ifp->if_flags |= IFF_RUNNING;
374 	tun_link_state(tp);
375 	if_up(ifp);
376 	splx(s);
377 
378 	TUNDEBUG(("%s: open\n", ifp->if_xname));
379 	return (0);
380 }
381 
382 /*
383  * tunclose - close the device; if closing the real device, flush pending
384  *  output and unless STAYUP bring down and destroy the interface.
385  */
386 int
387 tunclose(dev_t dev, int flag, int mode, struct proc *p)
388 {
389 	int			 s;
390 	struct tun_softc	*tp;
391 	struct ifnet		*ifp;
392 
393 	if ((tp = tun_lookup(minor(dev))) == NULL)
394 		return (ENXIO);
395 
396 	ifp = &tp->tun_if;
397 	tp->tun_flags &= ~(TUN_OPEN|TUN_NBIO|TUN_ASYNC);
398 
399 	/*
400 	 * junk all pending output
401 	 */
402 	s = splnet();
403 	ifp->if_flags &= ~IFF_RUNNING;
404 	tun_link_state(tp);
405 	IFQ_PURGE(&ifp->if_snd);
406 	splx(s);
407 
408 	TUNDEBUG(("%s: closed\n", ifp->if_xname));
409 
410 	if (!(tp->tun_flags & TUN_STAYUP))
411 		return (if_clone_destroy(ifp->if_xname));
412 	else {
413 		tp->tun_pgid = 0;
414 		selwakeup(&tp->tun_rsel);
415 	}
416 
417 	return (0);
418 }
419 
420 int
421 tuninit(struct tun_softc *tp)
422 {
423 	struct ifnet	*ifp = &tp->tun_if;
424 	struct ifaddr	*ifa;
425 
426 	TUNDEBUG(("%s: tuninit\n", ifp->if_xname));
427 
428 	ifp->if_flags |= IFF_UP | IFF_RUNNING;
429 	ifp->if_flags &= ~IFF_OACTIVE; /* we are never active */
430 
431 	tp->tun_flags &= ~(TUN_IASET|TUN_DSTADDR|TUN_BRDADDR);
432 	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
433 #ifdef INET
434 		if (ifa->ifa_addr->sa_family == AF_INET) {
435 			struct sockaddr_in *sin;
436 
437 			sin = satosin(ifa->ifa_addr);
438 			if (sin && sin->sin_addr.s_addr)
439 				tp->tun_flags |= TUN_IASET;
440 
441 			if (ifp->if_flags & IFF_POINTOPOINT) {
442 				sin = satosin(ifa->ifa_dstaddr);
443 				if (sin && sin->sin_addr.s_addr)
444 					tp->tun_flags |= TUN_DSTADDR;
445 			} else
446 				tp->tun_flags &= ~TUN_DSTADDR;
447 
448 			if (ifp->if_flags & IFF_BROADCAST) {
449 				sin = satosin(ifa->ifa_broadaddr);
450 				if (sin && sin->sin_addr.s_addr)
451 					tp->tun_flags |= TUN_BRDADDR;
452 			} else
453 				tp->tun_flags &= ~TUN_BRDADDR;
454 		}
455 #endif
456 #ifdef INET6
457 		if (ifa->ifa_addr->sa_family == AF_INET6) {
458 			struct sockaddr_in6 *sin;
459 
460 			sin = (struct sockaddr_in6 *)ifa->ifa_addr;
461 			if (!IN6_IS_ADDR_UNSPECIFIED(&sin->sin6_addr))
462 				tp->tun_flags |= TUN_IASET;
463 
464 			if (ifp->if_flags & IFF_POINTOPOINT) {
465 				sin = (struct sockaddr_in6 *)ifa->ifa_dstaddr;
466 				if (sin &&
467 				    !IN6_IS_ADDR_UNSPECIFIED(&sin->sin6_addr))
468 					tp->tun_flags |= TUN_DSTADDR;
469 			} else
470 				tp->tun_flags &= ~TUN_DSTADDR;
471 		}
472 #endif /* INET6 */
473 	}
474 
475 	return (0);
476 }
477 
478 /*
479  * Process an ioctl request.
480  */
481 int
482 tun_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
483 {
484 	struct tun_softc	*tp = (struct tun_softc *)(ifp->if_softc);
485 	struct ifreq		*ifr = (struct ifreq *)data;
486 	int			 error = 0, s;
487 
488 	s = splnet();
489 
490 	switch (cmd) {
491 	case SIOCSIFADDR:
492 		tuninit(tp);
493 		TUNDEBUG(("%s: address set\n", ifp->if_xname));
494 		if (tp->tun_flags & TUN_LAYER2)
495 			switch (((struct ifaddr *)data)->ifa_addr->sa_family) {
496 #ifdef INET
497 			case AF_INET:
498 				arp_ifinit(&tp->arpcom, (struct ifaddr *)data);
499 				break;
500 #endif
501 			default:
502 				break;
503 			}
504 		break;
505 	case SIOCSIFDSTADDR:
506 		tuninit(tp);
507 		TUNDEBUG(("%s: destination address set\n", ifp->if_xname));
508 		break;
509 	case SIOCSIFBRDADDR:
510 		tuninit(tp);
511 		TUNDEBUG(("%s: broadcast address set\n", ifp->if_xname));
512 		break;
513 	case SIOCSIFMTU:
514 		if (ifr->ifr_mtu < ETHERMIN || ifr->ifr_mtu > TUNMRU)
515 			error = EINVAL;
516 		else
517 			ifp->if_mtu = ifr->ifr_mtu;
518 		break;
519 	case SIOCADDMULTI:
520 	case SIOCDELMULTI: {
521 		if (ifr == 0) {
522 			error = EAFNOSUPPORT;	   /* XXX */
523 			break;
524 		}
525 
526 		if (tp->tun_flags & TUN_LAYER2) {
527 			error = (cmd == SIOCADDMULTI) ?
528 			    ether_addmulti(ifr, &tp->arpcom) :
529 			    ether_delmulti(ifr, &tp->arpcom);
530 			if (error == ENETRESET) {
531 				/*
532 				 * Multicast list has changed; set the hardware
533 				 * filter accordingly. The good thing is we do
534 				 * not have a hardware filter (:
535 				 */
536 				error = 0;
537 			}
538 			break;
539 		}
540 
541 		switch (ifr->ifr_addr.sa_family) {
542 #ifdef INET
543 		case AF_INET:
544 			break;
545 #endif
546 #ifdef INET6
547 		case AF_INET6:
548 			break;
549 #endif
550 		default:
551 			error = EAFNOSUPPORT;
552 			break;
553 		}
554 		break;
555 	}
556 
557 	case SIOCSIFFLAGS:
558 		error = tun_switch(tp,
559 		    ifp->if_flags & IFF_LINK0 ? TUN_LAYER2 : 0);
560 		break;
561 	case SIOCGIFMEDIA:
562 	case SIOCSIFMEDIA:
563 		error = ifmedia_ioctl(ifp, ifr, &tp->tun_media, cmd);
564 		break;
565 	default:
566 		if (tp->tun_flags & TUN_LAYER2)
567 			error = ether_ioctl(ifp, &tp->arpcom, cmd, data);
568 		else
569 			error = ENOTTY;
570 	}
571 
572 	splx(s);
573 	return (error);
574 }
575 
576 /*
577  * tun_output - queue packets from higher level ready to put out.
578  */
579 int
580 tun_output(struct ifnet *ifp, struct mbuf *m0, struct sockaddr *dst,
581     struct rtentry *rt)
582 {
583 	struct tun_softc	*tp = ifp->if_softc;
584 	int			 s, len, error;
585 	u_int32_t		*af;
586 
587 	if ((ifp->if_flags & (IFF_UP|IFF_RUNNING)) != (IFF_UP|IFF_RUNNING)) {
588 		m_freem(m0);
589 		return (EHOSTDOWN);
590 	}
591 
592 	TUNDEBUG(("%s: tun_output\n", ifp->if_xname));
593 
594 	if ((tp->tun_flags & TUN_READY) != TUN_READY) {
595 		TUNDEBUG(("%s: not ready %#x\n", ifp->if_xname,
596 		     tp->tun_flags));
597 		m_freem(m0);
598 		return (EHOSTDOWN);
599 	}
600 
601 	if (tp->tun_flags & TUN_LAYER2)
602 		/* call ether_output and that will call tunstart at the end */
603 		return (ether_output(ifp, m0, dst, rt));
604 
605 	M_PREPEND(m0, sizeof(*af), M_DONTWAIT);
606 	if (m0 == NULL)
607 		return (ENOBUFS);
608 	af = mtod(m0, u_int32_t *);
609 	*af = htonl(dst->sa_family);
610 
611 	s = splnet();
612 
613 #if NBPFILTER > 0
614 	if (ifp->if_bpf)
615 		bpf_mtap(ifp->if_bpf, m0, BPF_DIRECTION_OUT);
616 #endif
617 
618 	len = m0->m_pkthdr.len;
619 	IFQ_ENQUEUE(&ifp->if_snd, m0, NULL, error);
620 	if (error) {
621 		splx(s);
622 		ifp->if_collisions++;
623 		return (error);
624 	}
625 	splx(s);
626 	ifp->if_opackets++;
627 	ifp->if_obytes += len;
628 
629 	tun_wakeup(tp);
630 	return (0);
631 }
632 
633 void
634 tun_wakeup(struct tun_softc *tp)
635 {
636 	if (tp->tun_flags & TUN_RWAIT) {
637 		tp->tun_flags &= ~TUN_RWAIT;
638 		wakeup((caddr_t)tp);
639 	}
640 	if (tp->tun_flags & TUN_ASYNC && tp->tun_pgid)
641 		csignal(tp->tun_pgid, SIGIO,
642 		    tp->tun_siguid, tp->tun_sigeuid);
643 	selwakeup(&tp->tun_rsel);
644 }
645 
646 /*
647  * the cdevsw interface is now pretty minimal.
648  */
649 int
650 tunioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p)
651 {
652 	int			 s;
653 	struct tun_softc	*tp;
654 	struct tuninfo		*tunp;
655 	struct mbuf		*m;
656 
657 	if ((tp = tun_lookup(minor(dev))) == NULL)
658 		return (ENXIO);
659 
660 	s = splnet();
661 	switch (cmd) {
662 	case TUNSIFINFO:
663 		tunp = (struct tuninfo *)data;
664 		if (tunp->mtu < ETHERMIN || tunp->mtu > TUNMRU) {
665 			splx(s);
666 			return (EINVAL);
667 		}
668 		tp->tun_if.if_mtu = tunp->mtu;
669 		tp->tun_if.if_type = tunp->type;
670 		tp->tun_if.if_flags =
671 		    (tunp->flags & TUN_IFF_FLAGS) |
672 		    (tp->tun_if.if_flags & ~TUN_IFF_FLAGS);
673 		tp->tun_if.if_baudrate = tunp->baudrate;
674 		break;
675 	case TUNGIFINFO:
676 		tunp = (struct tuninfo *)data;
677 		tunp->mtu = tp->tun_if.if_mtu;
678 		tunp->type = tp->tun_if.if_type;
679 		tunp->flags = tp->tun_if.if_flags;
680 		tunp->baudrate = tp->tun_if.if_baudrate;
681 		break;
682 #ifdef TUN_DEBUG
683 	case TUNSDEBUG:
684 		tundebug = *(int *)data;
685 		break;
686 	case TUNGDEBUG:
687 		*(int *)data = tundebug;
688 		break;
689 #endif
690 	case TUNSIFMODE:
691 		switch (*(int *)data & (IFF_POINTOPOINT|IFF_BROADCAST)) {
692 		case IFF_POINTOPOINT:
693 		case IFF_BROADCAST:
694 			tp->tun_if.if_flags &= ~TUN_IFF_FLAGS;
695 			tp->tun_if.if_flags |= *(int *)data & TUN_IFF_FLAGS;
696 			break;
697 		default:
698 			splx(s);
699 			return (EINVAL);
700 		}
701 		break;
702 
703 	case FIONBIO:
704 		if (*(int *)data)
705 			tp->tun_flags |= TUN_NBIO;
706 		else
707 			tp->tun_flags &= ~TUN_NBIO;
708 		break;
709 	case FIOASYNC:
710 		if (*(int *)data)
711 			tp->tun_flags |= TUN_ASYNC;
712 		else
713 			tp->tun_flags &= ~TUN_ASYNC;
714 		break;
715 	case FIONREAD:
716 		IFQ_POLL(&tp->tun_if.if_snd, m);
717 		if (m != NULL)
718 			*(int *)data = m->m_pkthdr.len;
719 		else
720 			*(int *)data = 0;
721 		break;
722 	case TIOCSPGRP:
723 		tp->tun_pgid = *(int *)data;
724 		tp->tun_siguid = p->p_cred->p_ruid;
725 		tp->tun_sigeuid = p->p_ucred->cr_uid;
726 		break;
727 	case TIOCGPGRP:
728 		*(int *)data = tp->tun_pgid;
729 		break;
730 	case OSIOCGIFADDR:
731 	case SIOCGIFADDR:
732 		if (!(tp->tun_flags & TUN_LAYER2)) {
733 			splx(s);
734 			return (EINVAL);
735 		}
736 		bcopy(tp->arpcom.ac_enaddr, data,
737 		    sizeof(tp->arpcom.ac_enaddr));
738 		break;
739 
740 	case SIOCSIFADDR:
741 		if (!(tp->tun_flags & TUN_LAYER2)) {
742 			splx(s);
743 			return (EINVAL);
744 		}
745 		bcopy(data, tp->arpcom.ac_enaddr,
746 		    sizeof(tp->arpcom.ac_enaddr));
747 		break;
748 	default:
749 		splx(s);
750 		return (ENOTTY);
751 	}
752 	splx(s);
753 	return (0);
754 }
755 
756 /*
757  * The cdevsw read interface - reads a packet at a time, or at
758  * least as much of a packet as can be read.
759  */
760 int
761 tunread(dev_t dev, struct uio *uio, int ioflag)
762 {
763 	struct tun_softc	*tp;
764 	struct ifnet		*ifp;
765 	struct mbuf		*m, *m0;
766 	int			 error = 0, len, s;
767 
768 	if ((tp = tun_lookup(minor(dev))) == NULL)
769 		return (ENXIO);
770 
771 	ifp = &tp->tun_if;
772 	TUNDEBUG(("%s: read\n", ifp->if_xname));
773 	if ((tp->tun_flags & TUN_READY) != TUN_READY) {
774 		TUNDEBUG(("%s: not ready %#x\n", ifp->if_xname, tp->tun_flags));
775 		return (EHOSTDOWN);
776 	}
777 
778 	tp->tun_flags &= ~TUN_RWAIT;
779 
780 	s = splnet();
781 	do {
782 		while ((tp->tun_flags & TUN_READY) != TUN_READY)
783 			if ((error = tsleep((caddr_t)tp,
784 			    (PZERO + 1)|PCATCH, "tunread", 0)) != 0) {
785 				splx(s);
786 				return (error);
787 			}
788 		IFQ_DEQUEUE(&ifp->if_snd, m0);
789 		if (m0 == NULL) {
790 			if (tp->tun_flags & TUN_NBIO && ioflag & IO_NDELAY) {
791 				splx(s);
792 				return (EWOULDBLOCK);
793 			}
794 			tp->tun_flags |= TUN_RWAIT;
795 			if ((error = tsleep((caddr_t)tp,
796 			    (PZERO + 1)|PCATCH, "tunread", 0)) != 0) {
797 				splx(s);
798 				return (error);
799 			}
800 		}
801 	} while (m0 == NULL);
802 	splx(s);
803 
804 	while (m0 != NULL && uio->uio_resid > 0 && error == 0) {
805 		len = min(uio->uio_resid, m0->m_len);
806 		if (len != 0)
807 			error = uiomove(mtod(m0, caddr_t), len, uio);
808 		MFREE(m0, m);
809 		m0 = m;
810 	}
811 
812 	if (m0 != NULL) {
813 		TUNDEBUG(("Dropping mbuf\n"));
814 		m_freem(m0);
815 	}
816 	if (error)
817 		ifp->if_ierrors++;
818 
819 	return (error);
820 }
821 
822 /*
823  * the cdevsw write interface - an atomic write is a packet - or else!
824  */
825 int
826 tunwrite(dev_t dev, struct uio *uio, int ioflag)
827 {
828 	struct tun_softc	*tp;
829 	struct ifnet		*ifp;
830 	struct ifqueue		*ifq;
831 	u_int32_t		*th;
832 	struct mbuf		*top, **mp, *m;
833 	int			 isr;
834 	int			 error=0, s, tlen, mlen;
835 
836 	if ((tp = tun_lookup(minor(dev))) == NULL)
837 		return (ENXIO);
838 
839 	ifp = &tp->tun_if;
840 	TUNDEBUG(("%s: tunwrite\n", ifp->if_xname));
841 
842 	if (uio->uio_resid == 0 || uio->uio_resid > ifp->if_mtu +
843 	    (tp->tun_flags & TUN_LAYER2 ? ETHER_HDR_LEN : sizeof(*th))) {
844 		TUNDEBUG(("%s: len=%d!\n", ifp->if_xname, uio->uio_resid));
845 		return (EMSGSIZE);
846 	}
847 	tlen = uio->uio_resid;
848 
849 	/* get a header mbuf */
850 	MGETHDR(m, M_DONTWAIT, MT_DATA);
851 	if (m == NULL)
852 		return (ENOBUFS);
853 	mlen = MHLEN;
854 	if (uio->uio_resid >= MINCLSIZE) {
855 		MCLGET(m, M_DONTWAIT);
856 		if (!(m->m_flags & M_EXT)) {
857 			m_free(m);
858 			return (ENOBUFS);
859 		}
860 		mlen = MCLBYTES;
861 	}
862 
863 	top = NULL;
864 	mp = &top;
865 	if (tp->tun_flags & TUN_LAYER2) {
866 		/*
867 		 * Pad so that IP header is correctly aligned
868 		 * this is necessary for all strict aligned architectures.
869 		 */
870 		mlen -= ETHER_ALIGN;
871 		m->m_data += ETHER_ALIGN;
872 	}
873 	while (error == 0 && uio->uio_resid > 0) {
874 		m->m_len = min(mlen, uio->uio_resid);
875 		error = uiomove(mtod (m, caddr_t), m->m_len, uio);
876 		*mp = m;
877 		mp = &m->m_next;
878 		if (error == 0 && uio->uio_resid > 0) {
879 			MGET(m, M_DONTWAIT, MT_DATA);
880 			if (m == NULL) {
881 				error = ENOBUFS;
882 				break;
883 			}
884 			mlen = MLEN;
885 			if (uio->uio_resid >= MINCLSIZE) {
886 				MCLGET(m, M_DONTWAIT);
887 				if (!(m->m_flags & M_EXT)) {
888 					error = ENOBUFS;
889 					m_free(m);
890 					break;
891 				}
892 				mlen = MCLBYTES;
893 			}
894 		}
895 	}
896 	if (error) {
897 		if (top != NULL)
898 			m_freem(top);
899 		ifp->if_ierrors++;
900 		return (error);
901 	}
902 
903 	top->m_pkthdr.len = tlen;
904 	top->m_pkthdr.rcvif = ifp;
905 
906 #if NBPFILTER > 0
907 	if (ifp->if_bpf) {
908 		s = splnet();
909 		bpf_mtap(ifp->if_bpf, top, BPF_DIRECTION_IN);
910 		splx(s);
911 	}
912 #endif
913 
914 	if (tp->tun_flags & TUN_LAYER2) {
915 		/* quirk to not add randomness from a virtual device */
916 		atomic_setbits_int(&netisr, (1 << NETISR_RND_DONE));
917 
918 		s = splnet();
919 		ether_input_mbuf(ifp, top);
920 		splx(s);
921 
922 		ifp->if_ipackets++; /* ibytes are counted in ether_input */
923 
924 		return (0);
925 	}
926 
927 	th = mtod(top, u_int32_t *);
928 	/* strip the tunnel header */
929 	top->m_data += sizeof(*th);
930 	top->m_len  -= sizeof(*th);
931 	top->m_pkthdr.len -= sizeof(*th);
932 	top->m_pkthdr.rdomain = ifp->if_rdomain;
933 
934 	switch (ntohl(*th)) {
935 #ifdef INET
936 	case AF_INET:
937 		ifq = &ipintrq;
938 		isr = NETISR_IP;
939 		break;
940 #endif
941 #ifdef INET6
942 	case AF_INET6:
943 		ifq = &ip6intrq;
944 		isr = NETISR_IPV6;
945 		break;
946 #endif
947 #ifdef NETATALK
948 	case AF_APPLETALK:
949 		ifq = &atintrq2;
950 		isr = NETISR_ATALK;
951 		break;
952 #endif
953 	default:
954 		m_freem(top);
955 		return (EAFNOSUPPORT);
956 	}
957 
958 	s = splnet();
959 	if (IF_QFULL(ifq)) {
960 		IF_DROP(ifq);
961 		splx(s);
962 		ifp->if_collisions++;
963 		m_freem(top);
964 		if (!ifq->ifq_congestion)
965 			if_congestion(ifq);
966 		return (ENOBUFS);
967 	}
968 	IF_ENQUEUE(ifq, top);
969 	schednetisr(isr);
970 	ifp->if_ipackets++;
971 	ifp->if_ibytes += top->m_pkthdr.len;
972 	splx(s);
973 	return (error);
974 }
975 
976 /*
977  * tunpoll - the poll interface, this is only useful on reads
978  * really. The write detect always returns true, write never blocks
979  * anyway, it either accepts the packet or drops it.
980  */
981 int
982 tunpoll(dev_t dev, int events, struct proc *p)
983 {
984 	int			 revents, s;
985 	struct tun_softc	*tp;
986 	struct ifnet		*ifp;
987 	struct mbuf		*m;
988 
989 	if ((tp = tun_lookup(minor(dev))) == NULL)
990 		return (POLLERR);
991 
992 	ifp = &tp->tun_if;
993 	revents = 0;
994 	s = splnet();
995 	TUNDEBUG(("%s: tunpoll\n", ifp->if_xname));
996 
997 	if (events & (POLLIN | POLLRDNORM)) {
998 		IFQ_POLL(&ifp->if_snd, m);
999 		if (m != NULL) {
1000 			TUNDEBUG(("%s: tunselect q=%d\n", ifp->if_xname,
1001 			    ifp->if_snd.ifq_len));
1002 			revents |= events & (POLLIN | POLLRDNORM);
1003 		} else {
1004 			TUNDEBUG(("%s: tunpoll waiting\n", ifp->if_xname));
1005 			selrecord(p, &tp->tun_rsel);
1006 		}
1007 	}
1008 	if (events & (POLLOUT | POLLWRNORM))
1009 		revents |= events & (POLLOUT | POLLWRNORM);
1010 	splx(s);
1011 	return (revents);
1012 }
1013 
1014 /*
1015  * kqueue(2) support.
1016  *
1017  * The tun driver uses an array of tun_softc's based on the minor number
1018  * of the device.  kn->kn_hook gets set to the specific tun_softc.
1019  *
1020  * filt_tunread() sets kn->kn_data to the iface qsize
1021  * filt_tunwrite() sets kn->kn_data to the MTU size
1022  */
1023 int
1024 tunkqfilter(dev_t dev, struct knote *kn)
1025 {
1026 	int			 s;
1027 	struct klist		*klist;
1028 	struct tun_softc	*tp;
1029 	struct ifnet		*ifp;
1030 
1031 	if ((tp = tun_lookup(minor(dev))) == NULL)
1032 		return (ENXIO);
1033 
1034 	ifp = &tp->tun_if;
1035 
1036 	s = splnet();
1037 	TUNDEBUG(("%s: tunkqfilter\n", ifp->if_xname));
1038 	splx(s);
1039 
1040 	switch (kn->kn_filter) {
1041 		case EVFILT_READ:
1042 			klist = &tp->tun_rsel.si_note;
1043 			kn->kn_fop = &tunread_filtops;
1044 			break;
1045 		case EVFILT_WRITE:
1046 			klist = &tp->tun_wsel.si_note;
1047 			kn->kn_fop = &tunwrite_filtops;
1048 			break;
1049 		default:
1050 			return (EPERM);	/* 1 */
1051 	}
1052 
1053 	kn->kn_hook = (caddr_t)tp;
1054 
1055 	s = splhigh();
1056 	SLIST_INSERT_HEAD(klist, kn, kn_selnext);
1057 	splx(s);
1058 
1059 	return (0);
1060 }
1061 
1062 void
1063 filt_tunrdetach(struct knote *kn)
1064 {
1065 	int			 s;
1066 	struct tun_softc	*tp;
1067 
1068 	tp = (struct tun_softc *)kn->kn_hook;
1069 	s = splhigh();
1070 	if (!(kn->kn_status & KN_DETACHED))
1071 		SLIST_REMOVE(&tp->tun_rsel.si_note, kn, knote, kn_selnext);
1072 	splx(s);
1073 }
1074 
1075 int
1076 filt_tunread(struct knote *kn, long hint)
1077 {
1078 	int			 s;
1079 	struct tun_softc	*tp;
1080 	struct ifnet		*ifp;
1081 	struct mbuf		*m;
1082 
1083 	if (kn->kn_status & KN_DETACHED) {
1084 		kn->kn_data = 0;
1085 		return (1);
1086 	}
1087 
1088 	tp = (struct tun_softc *)kn->kn_hook;
1089 	ifp = &tp->tun_if;
1090 
1091 	s = splnet();
1092 	IFQ_POLL(&ifp->if_snd, m);
1093 	if (m != NULL) {
1094 		splx(s);
1095 		kn->kn_data = ifp->if_snd.ifq_len;
1096 
1097 		TUNDEBUG(("%s: tunkqread q=%d\n", ifp->if_xname,
1098 		    ifp->if_snd.ifq_len));
1099 		return (1);
1100 	}
1101 	splx(s);
1102 	TUNDEBUG(("%s: tunkqread waiting\n", ifp->if_xname));
1103 	return (0);
1104 }
1105 
1106 void
1107 filt_tunwdetach(struct knote *kn)
1108 {
1109 	int			 s;
1110 	struct tun_softc	*tp;
1111 
1112 	tp = (struct tun_softc *)kn->kn_hook;
1113 	s = splhigh();
1114 	if (!(kn->kn_status & KN_DETACHED))
1115 		SLIST_REMOVE(&tp->tun_wsel.si_note, kn, knote, kn_selnext);
1116 	splx(s);
1117 }
1118 
1119 int
1120 filt_tunwrite(struct knote *kn, long hint)
1121 {
1122 	struct tun_softc	*tp;
1123 	struct ifnet		*ifp;
1124 
1125 	if (kn->kn_status & KN_DETACHED) {
1126 		kn->kn_data = 0;
1127 		return (1);
1128 	}
1129 
1130 	tp = (struct tun_softc *)kn->kn_hook;
1131 	ifp = &tp->tun_if;
1132 
1133 	kn->kn_data = ifp->if_mtu;
1134 
1135 	return (1);
1136 }
1137 
1138 /*
1139  * Start packet transmission on the interface.
1140  * when the interface queue is rate-limited by ALTQ or TBR,
1141  * if_start is needed to drain packets from the queue in order
1142  * to notify readers when outgoing packets become ready.
1143  * In layer 2 mode this function is called from ether_output.
1144  */
1145 void
1146 tunstart(struct ifnet *ifp)
1147 {
1148 	struct tun_softc	*tp = ifp->if_softc;
1149 	struct mbuf		*m;
1150 
1151 	splassert(IPL_NET);
1152 
1153 	if (!(tp->tun_flags & TUN_LAYER2) &&
1154 	    !ALTQ_IS_ENABLED(&ifp->if_snd) &&
1155 	    !TBR_IS_ENABLED(&ifp->if_snd))
1156 		return;
1157 
1158 	IFQ_POLL(&ifp->if_snd, m);
1159 	if (m != NULL) {
1160 		if (tp->tun_flags & TUN_LAYER2) {
1161 #if NBPFILTER > 0
1162 			if (ifp->if_bpf)
1163 				bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT);
1164 #endif
1165 			ifp->if_opackets++;
1166 		}
1167 		tun_wakeup(tp);
1168 	}
1169 }
1170 
1171 void
1172 tun_link_state(struct tun_softc *tp)
1173 {
1174 	struct ifnet *ifp = &tp->tun_if;
1175 	int link_state = LINK_STATE_DOWN;
1176 
1177 	if (tp->tun_flags & TUN_OPEN) {
1178 		if (tp->tun_flags & TUN_LAYER2)
1179 			link_state = LINK_STATE_FULL_DUPLEX;
1180 		else
1181 			link_state = LINK_STATE_UP;
1182 	}
1183 	if (ifp->if_link_state != link_state) {
1184 		ifp->if_link_state = link_state;
1185 		if_link_state_change(ifp);
1186 	}
1187 }
1188 
1189 int
1190 tun_media_change(struct ifnet *ifp)
1191 {
1192 	/* Ignore */
1193 	return (0);
1194 }
1195 
1196 void
1197 tun_media_status(struct ifnet *ifp, struct ifmediareq *imr)
1198 {
1199 	struct tun_softc *tp = ifp->if_softc;
1200 
1201 	imr->ifm_active = IFM_ETHER | IFM_AUTO;
1202 	imr->ifm_status = IFM_AVALID;
1203 
1204 	tun_link_state(tp);
1205 
1206 	if (LINK_STATE_IS_UP(ifp->if_link_state) &&
1207 	    ifp->if_flags & IFF_UP)
1208 		imr->ifm_status |= IFM_ACTIVE;
1209 }
1210