xref: /netbsd-src/sys/net/if_tun.c (revision bada23909e740596d0a3785a73bd3583a9807fb8)
1 /*	$NetBSD: if_tun.c,v 1.37 1999/03/04 02:38:31 mjacob Exp $	*/
2 
3 /*
4  * Copyright (c) 1988, Julian Onions <jpo@cs.nott.ac.uk>
5  * Nottingham University 1987.
6  *
7  * This source may be freely distributed, however I would be interested
8  * in any changes that are made.
9  *
10  * This driver takes packets off the IP i/f and hands them up to a
11  * user process to have its wicked way with. This driver has its
12  * roots in a similar driver written by Phil Cockcroft (formerly) at
13  * UCL. This driver is based much more on read/write/poll mode of
14  * operation though.
15  */
16 
17 #include "tun.h"
18 #if NTUN > 0
19 
20 #include "opt_inet.h"
21 #include "opt_ns.h"
22 
23 #include <sys/param.h>
24 #include <sys/proc.h>
25 #include <sys/systm.h>
26 #include <sys/mbuf.h>
27 #include <sys/buf.h>
28 #include <sys/protosw.h>
29 #include <sys/socket.h>
30 #include <sys/ioctl.h>
31 #include <sys/errno.h>
32 #include <sys/syslog.h>
33 #include <sys/select.h>
34 #include <sys/poll.h>
35 #include <sys/file.h>
36 #include <sys/signalvar.h>
37 #include <sys/conf.h>
38 
39 #include <machine/cpu.h>
40 
41 #include <net/if.h>
42 #include <net/if_ether.h>
43 #include <net/netisr.h>
44 #include <net/route.h>
45 
46 
47 #ifdef INET
48 #include <netinet/in.h>
49 #include <netinet/in_systm.h>
50 #include <netinet/in_var.h>
51 #include <netinet/ip.h>
52 #include <netinet/if_inarp.h>
53 #endif
54 
55 #ifdef NS
56 #include <netns/ns.h>
57 #include <netns/ns_if.h>
58 #endif
59 
60 #include "bpfilter.h"
61 #if NBPFILTER > 0
62 #include <sys/time.h>
63 #include <net/bpf.h>
64 #endif
65 
66 #include <net/if_tun.h>
67 
68 #define TUNDEBUG	if (tundebug) printf
69 int	tundebug = 0;
70 
71 struct tun_softc tunctl[NTUN];
72 extern int ifqmaxlen;
73 void	tunattach __P((int));
74 
75 int	tun_ioctl __P((struct ifnet *, u_long, caddr_t));
76 int	tun_output __P((struct ifnet *, struct mbuf *, struct sockaddr *,
77 		       struct rtentry *rt));
78 
79 static void tuninit __P((struct tun_softc *));
80 
81 void
82 tunattach(unused)
83 	int unused;
84 {
85 	register int i;
86 	struct ifnet *ifp;
87 
88 	for (i = 0; i < NTUN; i++) {
89 		tunctl[i].tun_flags = TUN_INITED;
90 
91 		ifp = &tunctl[i].tun_if;
92 		sprintf(ifp->if_xname, "tun%d", i);
93 		ifp->if_softc = &tunctl[i];
94 		ifp->if_mtu = TUNMTU;
95 		ifp->if_ioctl = tun_ioctl;
96 		ifp->if_output = tun_output;
97 		ifp->if_flags = IFF_POINTOPOINT;
98 		ifp->if_snd.ifq_maxlen = ifqmaxlen;
99 		ifp->if_collisions = 0;
100 		ifp->if_ierrors = 0;
101 		ifp->if_oerrors = 0;
102 		ifp->if_ipackets = 0;
103 		ifp->if_opackets = 0;
104 		if_attach(ifp);
105 #if NBPFILTER > 0
106 		bpfattach(&tunctl[i].tun_bpf, ifp, DLT_NULL, sizeof(u_int32_t));
107 #endif
108 	}
109 }
110 
111 /*
112  * tunnel open - must be superuser & the device must be
113  * configured in
114  */
115 int
116 tunopen(dev, flag, mode, p)
117 	dev_t	dev;
118 	int	flag, mode;
119 	struct proc *p;
120 {
121 	struct ifnet	*ifp;
122 	struct tun_softc *tp;
123 	register int	unit, error;
124 
125 	if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
126 		return (error);
127 
128 	if ((unit = minor(dev)) >= NTUN)
129 		return (ENXIO);
130 	tp = &tunctl[unit];
131 	if (tp->tun_flags & TUN_OPEN)
132 		return ENXIO;
133 	ifp = &tp->tun_if;
134 	tp->tun_flags |= TUN_OPEN;
135 	TUNDEBUG("%s: open\n", ifp->if_xname);
136 	return (0);
137 }
138 
139 /*
140  * tunclose - close the device - mark i/f down & delete
141  * routing info
142  */
143 int
144 tunclose(dev, flag, mode, p)
145 	dev_t	dev;
146 	int	flag;
147 	int	mode;
148 	struct proc *p;
149 {
150 	register int	unit = minor(dev), s;
151 	struct tun_softc *tp = &tunctl[unit];
152 	struct ifnet	*ifp = &tp->tun_if;
153 	struct mbuf	*m;
154 
155 	tp->tun_flags &= ~TUN_OPEN;
156 
157 	/*
158 	 * junk all pending output
159 	 */
160 	do {
161 		s = splimp();
162 		IF_DEQUEUE(&ifp->if_snd, m);
163 		splx(s);
164 		if (m)
165 			m_freem(m);
166 	} while (m);
167 
168 	if (ifp->if_flags & IFF_UP) {
169 		s = splimp();
170 		if_down(ifp);
171 		if (ifp->if_flags & IFF_RUNNING) {
172 			/* find internet addresses and delete routes */
173 			register struct ifaddr *ifa;
174 			for (ifa = ifp->if_addrlist.tqh_first; ifa != 0;
175 			    ifa = ifa->ifa_list.tqe_next) {
176 				if (ifa->ifa_addr->sa_family == AF_INET) {
177 					rtinit(ifa, (int)RTM_DELETE,
178 					       tp->tun_flags & TUN_DSTADDR
179 							? RTF_HOST
180 							: 0);
181 				}
182 			}
183 		}
184 		splx(s);
185 	}
186 	tp->tun_pgrp = 0;
187 	selwakeup(&tp->tun_rsel);
188 
189 	TUNDEBUG ("%s: closed\n", ifp->if_xname);
190 	return (0);
191 }
192 
193 static void
194 tuninit(tp)
195 	struct tun_softc *tp;
196 {
197 	struct ifnet	*ifp = &tp->tun_if;
198 	register struct ifaddr *ifa;
199 
200 	TUNDEBUG("%s: tuninit\n", ifp->if_xname);
201 
202 	ifp->if_flags |= IFF_UP | IFF_RUNNING;
203 
204 	tp->tun_flags &= ~(TUN_IASET|TUN_DSTADDR);
205 	for (ifa = ifp->if_addrlist.tqh_first; ifa != 0;
206 	     ifa = ifa->ifa_list.tqe_next) {
207 		if (ifa->ifa_addr->sa_family == AF_INET) {
208 			struct sockaddr_in *sin;
209 
210 			sin = satosin(ifa->ifa_addr);
211 			if (sin && sin->sin_addr.s_addr)
212 				tp->tun_flags |= TUN_IASET;
213 
214 			if (ifp->if_flags & IFF_POINTOPOINT) {
215 				sin = satosin(ifa->ifa_dstaddr);
216 				if (sin && sin->sin_addr.s_addr)
217 					tp->tun_flags |= TUN_DSTADDR;
218 			}
219 		}
220 	}
221 
222 	return;
223 }
224 
225 /*
226  * Process an ioctl request.
227  */
228 int
229 tun_ioctl(ifp, cmd, data)
230 	struct ifnet *ifp;
231 	u_long cmd;
232 	caddr_t	data;
233 {
234 	int		error = 0, s;
235 
236 	s = splimp();
237 	switch(cmd) {
238 	case SIOCSIFADDR:
239 		tuninit((struct tun_softc *)(ifp->if_softc));
240 		TUNDEBUG("%s: address set\n", ifp->if_xname);
241 		break;
242 	case SIOCSIFDSTADDR:
243 		tuninit((struct tun_softc *)(ifp->if_softc));
244 		TUNDEBUG("%s: destination address set\n", ifp->if_xname);
245 		break;
246 	case SIOCSIFBRDADDR:
247 		TUNDEBUG("%s: broadcast address set\n", ifp->if_xname);
248 		break;
249 	case SIOCSIFMTU: {
250 		struct ifreq *ifr = (struct ifreq *) data;
251 		if (ifr->ifr_mtu > TUNMTU || ifr->ifr_mtu < 576) {
252 		    error = EINVAL;
253 		    break;
254 		}
255 		TUNDEBUG("%s: interface mtu set\n", ifp->if_xname);
256 		ifp->if_mtu = ifr->ifr_mtu;
257 		break;
258 	}
259 	case SIOCADDMULTI:
260 	case SIOCDELMULTI: {
261 		struct ifreq *ifr = (struct ifreq *) data;
262 		if (ifr == 0) {
263 	        	error = EAFNOSUPPORT;           /* XXX */
264 			break;
265 		}
266 		switch (ifr->ifr_addr.sa_family) {
267 
268 #ifdef INET
269 		case AF_INET:
270 			break;
271 #endif
272 
273 		default:
274 			error = EAFNOSUPPORT;
275 			break;
276 		}
277 		break;
278 	}
279 	default:
280 		error = EINVAL;
281 	}
282 	splx(s);
283 	return (error);
284 }
285 
286 /*
287  * tun_output - queue packets from higher level ready to put out.
288  */
289 int
290 tun_output(ifp, m0, dst, rt)
291 	struct ifnet   *ifp;
292 	struct mbuf    *m0;
293 	struct sockaddr *dst;
294 	struct rtentry *rt;
295 {
296 	struct tun_softc *tp = ifp->if_softc;
297 	struct proc	*p;
298 	int		s;
299 
300 	TUNDEBUG ("%s: tun_output\n", ifp->if_xname);
301 
302 	if ((tp->tun_flags & TUN_READY) != TUN_READY) {
303 		TUNDEBUG ("%s: not ready 0%o\n", ifp->if_xname,
304 			  tp->tun_flags);
305 		m_freem (m0);
306 		return (EHOSTDOWN);
307 	}
308 
309 #if NBPFILTER > 0
310 	if (tp->tun_bpf) {
311 		/*
312 		 * We need to prepend the address family as
313 		 * a four byte field.  Cons up a dummy header
314 		 * to pacify bpf.  This is safe because bpf
315 		 * will only read from the mbuf (i.e., it won't
316 		 * try to free it or keep a pointer to it).
317 		 */
318 		struct mbuf m;
319 		u_int32_t af = dst->sa_family;
320 
321 		m.m_next = m0;
322 		m.m_len = sizeof(af);
323 		m.m_data = (char *)&af;
324 
325 		bpf_mtap(tp->tun_bpf, &m);
326 	}
327 #endif
328 
329 	switch(dst->sa_family) {
330 #ifdef INET
331 	case AF_INET:
332 		if (tp->tun_flags & TUN_PREPADDR) {
333 			/* Simple link-layer header */
334 			M_PREPEND(m0, dst->sa_len, M_DONTWAIT);
335 			if (m0 == NULL) {
336 				IF_DROP(&ifp->if_snd);
337 				return (ENOBUFS);
338 			}
339 			bcopy(dst, mtod(m0, char *), dst->sa_len);
340 		}
341 		/* FALLTHROUGH */
342 	case AF_UNSPEC:
343 		s = splimp();
344 		if (IF_QFULL(&ifp->if_snd)) {
345 			IF_DROP(&ifp->if_snd);
346 			m_freem(m0);
347 			splx(s);
348 			ifp->if_collisions++;
349 			return (ENOBUFS);
350 		}
351 		IF_ENQUEUE(&ifp->if_snd, m0);
352 		splx(s);
353 		ifp->if_opackets++;
354 		break;
355 #endif
356 	default:
357 		m_freem(m0);
358 		return (EAFNOSUPPORT);
359 	}
360 
361 	if (tp->tun_flags & TUN_RWAIT) {
362 		tp->tun_flags &= ~TUN_RWAIT;
363 		wakeup((caddr_t)tp);
364 	}
365 	if (tp->tun_flags & TUN_ASYNC && tp->tun_pgrp) {
366 		if (tp->tun_pgrp > 0)
367 			gsignal(tp->tun_pgrp, SIGIO);
368 		else if ((p = pfind(-tp->tun_pgrp)) != NULL)
369 			psignal(p, SIGIO);
370 	}
371 	selwakeup(&tp->tun_rsel);
372 	return (0);
373 }
374 
375 /*
376  * the cdevsw interface is now pretty minimal.
377  */
378 int
379 tunioctl(dev, cmd, data, flag, p)
380 	dev_t		dev;
381 	u_long		cmd;
382 	caddr_t		data;
383 	int		flag;
384 	struct proc	*p;
385 {
386 	int		unit = minor(dev), s;
387 	struct tun_softc *tp = &tunctl[unit];
388 
389 	switch (cmd) {
390 	case TUNSDEBUG:
391 		tundebug = *(int *)data;
392 		break;
393 
394 	case TUNGDEBUG:
395 		*(int *)data = tundebug;
396 		break;
397 
398 	case TUNSIFMODE:
399 		switch (*(int *)data & (IFF_POINTOPOINT|IFF_BROADCAST)) {
400 		case IFF_POINTOPOINT:
401 		case IFF_BROADCAST:
402 			s = splimp();
403 			if (tp->tun_if.if_flags & IFF_UP) {
404 				splx(s);
405 				return (EBUSY);
406 			}
407 			tp->tun_if.if_flags &=
408 				~(IFF_BROADCAST|IFF_POINTOPOINT|IFF_MULTICAST);
409 			tp->tun_if.if_flags |= *(int *)data;
410 			splx(s);
411 			break;
412 		default:
413 			return (EINVAL);
414 			break;
415 		}
416 		break;
417 
418 	case TUNSLMODE:
419 		if (*(int *)data)
420 			tp->tun_flags |= TUN_PREPADDR;
421 		else
422 			tp->tun_flags &= ~TUN_PREPADDR;
423 		break;
424 
425 	case FIONBIO:
426 		if (*(int *)data)
427 			tp->tun_flags |= TUN_NBIO;
428 		else
429 			tp->tun_flags &= ~TUN_NBIO;
430 		break;
431 
432 	case FIOASYNC:
433 		if (*(int *)data)
434 			tp->tun_flags |= TUN_ASYNC;
435 		else
436 			tp->tun_flags &= ~TUN_ASYNC;
437 		break;
438 
439 	case FIONREAD:
440 		s = splimp();
441 		if (tp->tun_if.if_snd.ifq_head)
442 			*(int *)data = tp->tun_if.if_snd.ifq_head->m_pkthdr.len;
443 		else
444 			*(int *)data = 0;
445 		splx(s);
446 		break;
447 
448 	case TIOCSPGRP:
449 		tp->tun_pgrp = *(int *)data;
450 		break;
451 
452 	case TIOCGPGRP:
453 		*(int *)data = tp->tun_pgrp;
454 		break;
455 
456 	default:
457 		return (ENOTTY);
458 	}
459 	return (0);
460 }
461 
462 /*
463  * The cdevsw read interface - reads a packet at a time, or at
464  * least as much of a packet as can be read.
465  */
466 int
467 tunread(dev, uio, ioflag)
468 	dev_t		dev;
469 	struct uio	*uio;
470 	int		ioflag;
471 {
472 	int		unit = minor(dev);
473 	struct tun_softc *tp = &tunctl[unit];
474 	struct ifnet	*ifp = &tp->tun_if;
475 	struct mbuf	*m, *m0;
476 	int		error=0, len, s;
477 
478 	TUNDEBUG ("%s: read\n", ifp->if_xname);
479 	if ((tp->tun_flags & TUN_READY) != TUN_READY) {
480 		TUNDEBUG ("%s: not ready 0%o\n", ifp->if_xname, tp->tun_flags);
481 		return EHOSTDOWN;
482 	}
483 
484 	tp->tun_flags &= ~TUN_RWAIT;
485 
486 	s = splimp();
487 	do {
488 		IF_DEQUEUE(&ifp->if_snd, m0);
489 		if (m0 == 0) {
490 			if (tp->tun_flags & TUN_NBIO) {
491 				splx(s);
492 				return (EWOULDBLOCK);
493 			}
494 			tp->tun_flags |= TUN_RWAIT;
495 			if (tsleep((caddr_t)tp, PZERO|PCATCH, "tunread", 0)) {
496 				splx(s);
497 				return (EINTR);
498 			}
499 		}
500 	} while (m0 == 0);
501 	splx(s);
502 
503 	while (m0 && uio->uio_resid > 0 && error == 0) {
504 		len = min(uio->uio_resid, m0->m_len);
505 		if (len == 0)
506 			break;
507 		error = uiomove(mtod(m0, caddr_t), len, uio);
508 		MFREE(m0, m);
509 		m0 = m;
510 	}
511 
512 	if (m0) {
513 		TUNDEBUG("Dropping mbuf\n");
514 		m_freem(m0);
515 	}
516 	if (error)
517 		ifp->if_ierrors++;
518 	return (error);
519 }
520 
521 /*
522  * the cdevsw write interface - an atomic write is a packet - or else!
523  */
524 int
525 tunwrite(dev, uio, ioflag)
526 	dev_t		dev;
527 	struct uio	*uio;
528 	int		ioflag;
529 {
530 	int		unit = minor (dev);
531 	struct tun_softc *tp = &tunctl[unit];
532 	struct ifnet	*ifp = &tp->tun_if;
533 	struct mbuf	*top, **mp, *m;
534 	struct ifqueue	*ifq;
535 	struct sockaddr	dst;
536 	int		isr, error=0, s, tlen, mlen;
537 
538 	TUNDEBUG("%s: tunwrite\n", ifp->if_xname);
539 
540 	if (tp->tun_flags & TUN_PREPADDR) {
541 		if (uio->uio_resid < sizeof(dst))
542 			return (EIO);
543 		error = uiomove((caddr_t)&dst, sizeof(dst), uio);
544 		if (dst.sa_len > sizeof(dst)) {
545 			/* Duh.. */
546 			char discard;
547 			int n = dst.sa_len - sizeof(dst);
548 			while (n--)
549 				if ((error = uiomove(&discard, 1, uio)) != 0)
550 					return (error);
551 		}
552 	} else {
553 #ifdef INET
554 		dst.sa_family = AF_INET;
555 #endif
556 	}
557 
558 	if (uio->uio_resid < 0 || uio->uio_resid > TUNMTU) {
559 		TUNDEBUG("%s: len=%lu!\n", ifp->if_xname,
560 		    (unsigned long)uio->uio_resid);
561 		return (EIO);
562 	}
563 
564 	switch (dst.sa_family) {
565 #ifdef INET
566 	case AF_INET:
567 		ifq = &ipintrq;
568 		isr = NETISR_IP;
569 		break;
570 #endif
571 	default:
572 		return (EAFNOSUPPORT);
573 	}
574 
575 	tlen = uio->uio_resid;
576 
577 	/* get a header mbuf */
578 	MGETHDR(m, M_DONTWAIT, MT_DATA);
579 	if (m == NULL)
580 		return (ENOBUFS);
581 	mlen = MHLEN;
582 
583 	top = 0;
584 	mp = &top;
585 	while (error == 0 && uio->uio_resid > 0) {
586 		m->m_len = min(mlen, uio->uio_resid);
587 		error = uiomove(mtod (m, caddr_t), m->m_len, uio);
588 		*mp = m;
589 		mp = &m->m_next;
590 		if (uio->uio_resid > 0) {
591 			MGET (m, M_DONTWAIT, MT_DATA);
592 			if (m == 0) {
593 				error = ENOBUFS;
594 				break;
595 			}
596 			mlen = MLEN;
597 		}
598 	}
599 	if (error) {
600 		if (top)
601 			m_freem (top);
602 		ifp->if_ierrors++;
603 		return (error);
604 	}
605 
606 	top->m_pkthdr.len = tlen;
607 	top->m_pkthdr.rcvif = ifp;
608 
609 #if NBPFILTER > 0
610 	if (tp->tun_bpf) {
611 		/*
612 		 * We need to prepend the address family as
613 		 * a four byte field.  Cons up a dummy header
614 		 * to pacify bpf.  This is safe because bpf
615 		 * will only read from the mbuf (i.e., it won't
616 		 * try to free it or keep a pointer to it).
617 		 */
618 		struct mbuf m;
619 		u_int32_t af = AF_INET;
620 
621 		m.m_next = top;
622 		m.m_len = sizeof(af);
623 		m.m_data = (char *)&af;
624 
625 		bpf_mtap(tp->tun_bpf, &m);
626 	}
627 #endif
628 
629 	s = splimp();
630 	if (IF_QFULL(ifq)) {
631 		IF_DROP(ifq);
632 		splx(s);
633 		ifp->if_collisions++;
634 		m_freem(top);
635 		return (ENOBUFS);
636 	}
637 	IF_ENQUEUE(ifq, top);
638 	splx(s);
639 	ifp->if_ipackets++;
640 	schednetisr(isr);
641 	return (error);
642 }
643 
644 /*
645  * tunpoll - the poll interface, this is only useful on reads
646  * really. The write detect always returns true, write never blocks
647  * anyway, it either accepts the packet or drops it.
648  */
649 int
650 tunpoll(dev, events, p)
651 	dev_t		dev;
652 	int		events;
653 	struct proc	*p;
654 {
655 	int		unit = minor(dev), s;
656 	struct tun_softc *tp = &tunctl[unit];
657 	struct ifnet	*ifp = &tp->tun_if;
658 	int		revents = 0;
659 
660 	s = splimp();
661 	TUNDEBUG("%s: tunpoll\n", ifp->if_xname);
662 
663 	if (events & (POLLIN | POLLRDNORM)) {
664 		if (ifp->if_snd.ifq_len > 0) {
665 			TUNDEBUG("%s: tunpoll q=%d\n", ifp->if_xname,
666 			    ifp->if_snd.ifq_len);
667 			revents |= events & (POLLIN | POLLRDNORM);
668 		} else {
669 			TUNDEBUG("%s: tunpoll waiting\n", ifp->if_xname);
670 			selrecord(p, &tp->tun_rsel);
671 		}
672 	}
673 
674 	if (events & (POLLOUT | POLLWRNORM))
675 		revents |= events & (POLLOUT | POLLWRNORM);
676 
677 	splx(s);
678 	return (revents);
679 }
680 
681 #endif  /* NTUN */
682